Repository: vega/altair Branch: main Commit: 98de0277c912 Files: 535 Total size: 7.1 MB Directory structure: gitextract_edckvuj_/ ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report.yml │ │ ├── config.yml │ │ └── feature-request.yml │ ├── dependabot.yaml │ ├── pull_request_template.md │ ├── release.yml │ └── workflows/ │ ├── build-free-threaded.yml │ ├── build.yml │ ├── check-pr.yml │ ├── docbuild.yml │ ├── lint.yml │ └── weekly.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTES_FOR_MAINTAINERS.md ├── README.md ├── RELEASING.md ├── altair/ │ ├── __init__.py │ ├── _magics.py │ ├── datasets/ │ │ ├── __init__.py │ │ ├── _cache.py │ │ ├── _constraints.py │ │ ├── _data.py │ │ ├── _exceptions.py │ │ ├── _loader.py │ │ ├── _metadata/ │ │ │ └── metadata.parquet │ │ ├── _reader.py │ │ ├── _readimpl.py │ │ └── _typing.py │ ├── expr/ │ │ ├── __init__.py │ │ ├── consts.py │ │ ├── core.py │ │ └── funcs.py │ ├── jupyter/ │ │ ├── __init__.py │ │ ├── js/ │ │ │ ├── README.md │ │ │ └── index.js │ │ └── jupyter_chart.py │ ├── py.typed │ ├── theme.py │ ├── typing/ │ │ └── __init__.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── _dfi_types.py │ │ ├── _importers.py │ │ ├── _show.py │ │ ├── _transformed_data.py │ │ ├── _vegafusion_data.py │ │ ├── compiler.py │ │ ├── core.py │ │ ├── data.py │ │ ├── deprecation.py │ │ ├── display.py │ │ ├── execeval.py │ │ ├── html.py │ │ ├── mimebundle.py │ │ ├── plugin_registry.py │ │ ├── save.py │ │ ├── schemapi.py │ │ ├── selection.py │ │ └── server.py │ └── vegalite/ │ ├── __init__.py │ ├── api.py │ ├── data.py │ ├── display.py │ ├── schema.py │ └── v6/ │ ├── __init__.py │ ├── api.py │ ├── compiler.py │ ├── data.py │ ├── display.py │ ├── schema/ │ │ ├── __init__.py │ │ ├── _config.py │ │ ├── _typing.py │ │ ├── channels.py │ │ ├── core.py │ │ ├── mixins.py │ │ ├── vega-lite-schema.json │ │ └── vega-themes.json │ └── theme.py ├── doc/ │ ├── .gitignore │ ├── _static/ │ │ ├── altair-gallery.css │ │ ├── altair-plot.css │ │ ├── chart.html │ │ ├── custom.css │ │ └── theme_overrides.css │ ├── _templates/ │ │ ├── class.rst │ │ ├── navbar-project.html │ │ └── sidebar-logo.html │ ├── about/ │ │ ├── citing.rst │ │ ├── code_of_conduct.rst │ │ ├── governance.rst │ │ ├── roadmap.rst │ │ └── versioning.rst │ ├── case_studies/ │ │ ├── exploring-weather.rst │ │ ├── index.rst │ │ └── numpy-tooltip-images.rst │ ├── conf.py │ ├── getting_started/ │ │ ├── getting_help.rst │ │ ├── installation.rst │ │ ├── overview.rst │ │ ├── project_philosophy.rst │ │ ├── resources.rst │ │ └── starting.rst │ ├── index.rst │ ├── releases/ │ │ └── changes.rst │ └── user_guide/ │ ├── api.rst │ ├── compound_charts.rst │ ├── configuration.rst │ ├── custom_renderers.rst │ ├── customization.rst │ ├── data.rst │ ├── data_transformers.rst │ ├── display_frontends.rst │ ├── encodings/ │ │ ├── channel_options.rst │ │ ├── channels.rst │ │ └── index.rst │ ├── interactions/ │ │ ├── bindings_widgets.rst │ │ ├── expressions.rst │ │ ├── index.rst │ │ ├── jupyter_chart.rst │ │ └── parameters.rst │ ├── internals.rst │ ├── large_datasets.rst │ ├── marks/ │ │ ├── arc.rst │ │ ├── area.rst │ │ ├── bar.rst │ │ ├── boxplot.rst │ │ ├── circle.rst │ │ ├── errorband.rst │ │ ├── errorbar.rst │ │ ├── geoshape.rst │ │ ├── image.rst │ │ ├── index.rst │ │ ├── line.rst │ │ ├── point.rst │ │ ├── rect.rst │ │ ├── rule.rst │ │ ├── square.rst │ │ ├── text.rst │ │ ├── tick.rst │ │ └── trail.rst │ ├── saving_charts.rst │ ├── scale_resolve.rst │ ├── times_and_dates.rst │ └── transform/ │ ├── aggregate.rst │ ├── bin.rst │ ├── calculate.rst │ ├── density.rst │ ├── extent.rst │ ├── filter.rst │ ├── flatten.rst │ ├── fold.rst │ ├── impute.rst │ ├── index.rst │ ├── joinaggregate.rst │ ├── loess.rst │ ├── lookup.rst │ ├── pivot.rst │ ├── quantile.rst │ ├── regression.rst │ ├── sample.rst │ ├── stack.rst │ ├── timeunit.rst │ └── window.rst ├── paper/ │ ├── paper.bib │ └── paper.md ├── pyproject.toml ├── sphinxext/ │ ├── __init__.py │ ├── altairgallery.py │ ├── code_ref.py │ ├── schematable.py │ └── utils.py ├── tests/ │ ├── __init__.py │ ├── altair_theme_test.py │ ├── examples_arguments_syntax/ │ │ ├── __init__.py │ │ ├── airport_connections.py │ │ ├── annual_weather_heatmap.py │ │ ├── anscombe_plot.py │ │ ├── area_chart_gradient.py │ │ ├── area_faceted.py │ │ ├── bar_and_line_with_dual_axis.py │ │ ├── bar_chart_faceted_compact.py │ │ ├── bar_chart_horizontal.py │ │ ├── bar_chart_sorted.py │ │ ├── bar_chart_with_highlighted_bar.py │ │ ├── bar_chart_with_highlighted_segment.py │ │ ├── bar_chart_with_labels.py │ │ ├── bar_chart_with_labels_measured_luminance.py │ │ ├── bar_chart_with_mean_line.py │ │ ├── bar_chart_with_negatives.py │ │ ├── bar_chart_with_range.py │ │ ├── bar_chart_with_single_threshold.py │ │ ├── bar_faceted_stacked.py │ │ ├── bar_rounded.py │ │ ├── bar_with_rolling_mean.py │ │ ├── beckers_barley_facet.py │ │ ├── beckers_barley_wrapped_facet.py │ │ ├── boxplot.py │ │ ├── bubble_plot.py │ │ ├── bump_chart.py │ │ ├── calculate_residuals.py │ │ ├── candlestick_chart.py │ │ ├── choropleth.py │ │ ├── choropleth_repeat.py │ │ ├── co2_concentration.py │ │ ├── comet_chart.py │ │ ├── cumulative_count_chart.py │ │ ├── dendrogram.py │ │ ├── density_repeat.py │ │ ├── density_stack.py │ │ ├── deviation_ellipses.py │ │ ├── distributions_and_medians_of_likert_scale_ratings.py │ │ ├── distributions_faceted_histogram.py │ │ ├── diverging_stacked_bar_chart.py │ │ ├── donut_chart.py │ │ ├── dot_dash_plot.py │ │ ├── empirical_cumulative_distribution_function.py │ │ ├── errorbars_with_ci.py │ │ ├── errorbars_with_std.py │ │ ├── falkensee.py │ │ ├── filled_step_chart.py │ │ ├── gantt_chart.py │ │ ├── gapminder_bubble_plot.py │ │ ├── groupby-map.py │ │ ├── grouped_bar_chart.py │ │ ├── grouped_bar_chart2.py │ │ ├── grouped_bar_chart_horizontal.py │ │ ├── grouped_bar_chart_overlapping_bars.py │ │ ├── grouped_bar_chart_with_error_bars.py │ │ ├── heat_lane.py │ │ ├── hexbins.py │ │ ├── histogram_gradient_color.py │ │ ├── histogram_heatmap.py │ │ ├── histogram_responsive.py │ │ ├── histogram_scatterplot.py │ │ ├── histogram_with_a_global_mean_overlay.py │ │ ├── horizon_graph.py │ │ ├── horizontal_stacked_bar_chart.py │ │ ├── interactive_aggregation.py │ │ ├── interactive_bar_select_highlight.py │ │ ├── interactive_brush.py │ │ ├── interactive_column_selection.py │ │ ├── interactive_cross_highlight.py │ │ ├── interactive_layered_crossfilter.py │ │ ├── interactive_legend.py │ │ ├── interactive_reorder_stacked_bars.py │ │ ├── interactive_scatter_plot.py │ │ ├── interval_selection.py │ │ ├── interval_selection_map_quakes.py │ │ ├── iowa_electricity.py │ │ ├── isotype.py │ │ ├── isotype_emoji.py │ │ ├── isotype_grid.py │ │ ├── lasagna_plot.py │ │ ├── layer_line_color_rule.py │ │ ├── layered_area_chart.py │ │ ├── layered_bar_chart.py │ │ ├── layered_chart_bar_mark.py │ │ ├── layered_chart_with_dual_axis.py │ │ ├── layered_heatmap_text.py │ │ ├── layered_histogram.py │ │ ├── line_chart_with_arrows.py │ │ ├── line_chart_with_color_datum.py │ │ ├── line_chart_with_cumsum.py │ │ ├── line_chart_with_cumsum_faceted.py │ │ ├── line_chart_with_custom_legend.py │ │ ├── line_chart_with_datum.py │ │ ├── line_chart_with_generator.py │ │ ├── line_chart_with_interpolation.py │ │ ├── line_chart_with_points.py │ │ ├── line_chart_with_points_stroked.py │ │ ├── line_custom_order.py │ │ ├── line_percent.py │ │ ├── line_with_ci.py │ │ ├── line_with_last_value_labeled.py │ │ ├── line_with_log_scale.py │ │ ├── london_tube.py │ │ ├── maps_faceted_species.py │ │ ├── mosaic_with_labels.py │ │ ├── multi_series_line.py │ │ ├── multifeature_scatter_plot.py │ │ ├── multiline_highlight.py │ │ ├── multiline_tooltip.py │ │ ├── multiline_tooltip_standard.py │ │ ├── multiple_interactions.py │ │ ├── natural_disasters.py │ │ ├── normalized_stacked_area_chart.py │ │ ├── normalized_stacked_bar_chart.py │ │ ├── normed_parallel_coordinates.py │ │ ├── one_dot_per_zipcode.py │ │ ├── pacman_chart.py │ │ ├── parallel_coordinates.py │ │ ├── percentage_of_total.py │ │ ├── pie_chart.py │ │ ├── pie_chart_with_labels.py │ │ ├── point_map.py │ │ ├── polar_bar_chart.py │ │ ├── poly_fit_regression.py │ │ ├── pyramid.py │ │ ├── radial_chart.py │ │ ├── ranged_dot_plot.py │ │ ├── ridgeline_plot.py │ │ ├── scatter_faceted.py │ │ ├── scatter_href.py │ │ ├── scatter_linked_brush.py │ │ ├── scatter_linked_table.py │ │ ├── scatter_marginal_hist.py │ │ ├── scatter_matrix.py │ │ ├── scatter_point_paths_hover.py │ │ ├── scatter_qq.py │ │ ├── scatter_tooltips.py │ │ ├── scatter_with_histogram.py │ │ ├── scatter_with_labels.py │ │ ├── scatter_with_layered_histogram.py │ │ ├── scatter_with_loess.py │ │ ├── scatter_with_minimap.py │ │ ├── scatter_with_rolling_mean.py │ │ ├── scatter_with_shaded_area.py │ │ ├── seattle_weather_interactive.py │ │ ├── select_detail.py │ │ ├── select_mark_area.py │ │ ├── selection_histogram.py │ │ ├── selection_layer_bar_month.py │ │ ├── selection_zorder.py │ │ ├── simple_bar_chart.py │ │ ├── simple_heatmap.py │ │ ├── simple_histogram.py │ │ ├── simple_line_chart.py │ │ ├── simple_scatter_with_errorbars.py │ │ ├── simple_stacked_area_chart.py │ │ ├── slider_cutoff.py │ │ ├── slope_graph.py │ │ ├── sorted_error_bars_with_ci.py │ │ ├── stacked_bar_chart.py │ │ ├── stacked_bar_chart_sorted_segments.py │ │ ├── stacked_bar_chart_with_text.py │ │ ├── stem_and_leaf.py │ │ ├── step_chart.py │ │ ├── streamgraph.py │ │ ├── strip_plot.py │ │ ├── strip_plot_jitter.py │ │ ├── table_bubble_plot_github.py │ │ ├── top_k_items.py │ │ ├── top_k_letters.py │ │ ├── top_k_with_others.py │ │ ├── trail_marker.py │ │ ├── us_employment.py │ │ ├── us_incomebrackets_by_state_facet.py │ │ ├── us_population_over_time.py │ │ ├── us_population_over_time_facet.py │ │ ├── us_population_pyramid_over_time.py │ │ ├── us_state_capitals.py │ │ ├── violin_plot.py │ │ ├── waterfall_chart.py │ │ ├── wheat_wages.py │ │ ├── wilkinson-dot-plot.py │ │ ├── wind_vector_map.py │ │ ├── window_rank.py │ │ ├── world_map.py │ │ └── world_projections.py │ ├── examples_methods_syntax/ │ │ ├── __init__.py │ │ ├── airport_connections.py │ │ ├── annual_weather_heatmap.py │ │ ├── anscombe_plot.py │ │ ├── area_faceted.py │ │ ├── bar_chart_faceted_compact.py │ │ ├── bar_chart_sorted.py │ │ ├── bar_chart_with_labels_measured_luminance.py │ │ ├── bar_chart_with_range.py │ │ ├── bar_chart_with_single_threshold.py │ │ ├── beckers_barley_facet.py │ │ ├── beckers_barley_wrapped_facet.py │ │ ├── bump_chart.py │ │ ├── calculate_residuals.py │ │ ├── candlestick_chart.py │ │ ├── co2_concentration.py │ │ ├── comet_chart.py │ │ ├── cumulative_count_chart.py │ │ ├── density_repeat.py │ │ ├── density_stack.py │ │ ├── deviation_ellipses.py │ │ ├── distributions_and_medians_of_likert_scale_ratings.py │ │ ├── distributions_faceted_histogram.py │ │ ├── diverging_stacked_bar_chart.py │ │ ├── donut_chart.py │ │ ├── errorbars_with_ci.py │ │ ├── errorbars_with_std.py │ │ ├── falkensee.py │ │ ├── gapminder_bubble_plot.py │ │ ├── groupby-map.py │ │ ├── grouped_bar_chart2.py │ │ ├── grouped_bar_chart_overlapping_bars.py │ │ ├── grouped_bar_chart_with_error_bars.py │ │ ├── heat_lane.py │ │ ├── hexbins.py │ │ ├── histogram_gradient_color.py │ │ ├── histogram_heatmap.py │ │ ├── histogram_responsive.py │ │ ├── histogram_scatterplot.py │ │ ├── histogram_with_a_global_mean_overlay.py │ │ ├── horizon_graph.py │ │ ├── interactive_aggregation.py │ │ ├── interactive_bar_select_highlight.py │ │ ├── interactive_column_selection.py │ │ ├── interactive_cross_highlight.py │ │ ├── interactive_layered_crossfilter.py │ │ ├── interactive_legend.py │ │ ├── interval_selection.py │ │ ├── interval_selection_map_quakes.py │ │ ├── iowa_electricity.py │ │ ├── isotype.py │ │ ├── isotype_emoji.py │ │ ├── isotype_grid.py │ │ ├── lasagna_plot.py │ │ ├── layered_area_chart.py │ │ ├── layered_bar_chart.py │ │ ├── layered_chart_with_dual_axis.py │ │ ├── layered_heatmap_text.py │ │ ├── layered_histogram.py │ │ ├── line_chart_with_color_datum.py │ │ ├── line_chart_with_cumsum.py │ │ ├── line_chart_with_cumsum_faceted.py │ │ ├── line_chart_with_custom_legend.py │ │ ├── line_custom_order.py │ │ ├── line_percent.py │ │ ├── line_with_ci.py │ │ ├── line_with_last_value_labeled.py │ │ ├── line_with_log_scale.py │ │ ├── london_tube.py │ │ ├── maps_faceted_species.py │ │ ├── mosaic_with_labels.py │ │ ├── multifeature_scatter_plot.py │ │ ├── multiline_highlight.py │ │ ├── multiline_tooltip.py │ │ ├── multiline_tooltip_standard.py │ │ ├── multiple_interactions.py │ │ ├── natural_disasters.py │ │ ├── normalized_stacked_area_chart.py │ │ ├── normalized_stacked_bar_chart.py │ │ ├── pacman_chart.py │ │ ├── parallel_coordinates.py │ │ ├── percentage_of_total.py │ │ ├── pie_chart.py │ │ ├── pie_chart_with_labels.py │ │ ├── polar_bar_chart.py │ │ ├── poly_fit_regression.py │ │ ├── pyramid.py │ │ ├── radial_chart.py │ │ ├── ranged_dot_plot.py │ │ ├── ridgeline_plot.py │ │ ├── scatter_linked_table.py │ │ ├── scatter_marginal_hist.py │ │ ├── scatter_point_paths_hover.py │ │ ├── scatter_with_layered_histogram.py │ │ ├── scatter_with_minimap.py │ │ ├── scatter_with_rolling_mean.py │ │ ├── seattle_weather_interactive.py │ │ ├── select_detail.py │ │ ├── simple_scatter_with_errorbars.py │ │ ├── sorted_error_bars_with_ci.py │ │ ├── stacked_bar_chart_sorted_segments.py │ │ ├── stacked_bar_chart_with_text.py │ │ ├── stem_and_leaf.py │ │ ├── streamgraph.py │ │ ├── strip_plot_jitter.py │ │ ├── top_k_items.py │ │ ├── top_k_letters.py │ │ ├── top_k_with_others.py │ │ ├── us_employment.py │ │ ├── us_population_over_time.py │ │ ├── us_population_over_time_facet.py │ │ ├── us_population_pyramid_over_time.py │ │ ├── us_state_capitals.py │ │ ├── violin_plot.py │ │ ├── wheat_wages.py │ │ ├── wilkinson-dot-plot.py │ │ ├── wind_vector_map.py │ │ └── window_rank.py │ ├── expr/ │ │ ├── __init__.py │ │ └── test_expr.py │ ├── test_datasets.py │ ├── test_examples.py │ ├── test_jupyter_chart.py │ ├── test_magics.py │ ├── test_toplevel.py │ ├── test_transformed_data.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── test_compiler.py │ │ ├── test_core.py │ │ ├── test_data.py │ │ ├── test_deprecation.py │ │ ├── test_execeval.py │ │ ├── test_html.py │ │ ├── test_mimebundle.py │ │ ├── test_plugin_registry.py │ │ ├── test_schemapi.py │ │ ├── test_server.py │ │ ├── test_to_values_narwhals.py │ │ └── test_utils.py │ └── vegalite/ │ ├── __init__.py │ ├── test_common.py │ └── v6/ │ ├── __init__.py │ ├── schema/ │ │ ├── __init__.py │ │ └── test_channels.py │ ├── test_alias.py │ ├── test_api.py │ ├── test_data.py │ ├── test_display.py │ ├── test_geo_interface.py │ ├── test_layer_props.py │ ├── test_params.py │ ├── test_renderers.py │ └── test_theme.py └── tools/ ├── __init__.py ├── cleanup_nightlies.py ├── codemod.py ├── datasets/ │ ├── __init__.py │ ├── datapackage.py │ ├── models.py │ └── npm.py ├── fs.py ├── generate_api_docs.py ├── generate_nightly_version.py ├── generate_schema_wrapper.py ├── markup.py ├── schemapi/ │ ├── __init__.py │ ├── codegen.py │ ├── schemapi.py │ └── utils.py ├── sync_website.py ├── update_init_file.py ├── vega_expr.py └── versioning.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ * text=auto eol=lf ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report.yml ================================================ name: Bug report description: Report something that is broken labels: ["bug", "needs-triage"] body: - type: markdown attributes: value: | Thanks for taking the time to fill out this bug report! Note that since Altair is a Python wrapper around the Vega-Lite visualization grammar, [most bugs should be reported directly to Vega-Lite](https://github.com/vega/vega-lite/issues). You can click the Action Button (`...`) of your Altair chart and "Open in Vega Editor" to see if you get the same error in the Vega Editor. If you can't reproduce the bug in the Vega Editor, then make sure you are using [the latest version of Altair](https://github.com/vega/altair/releases) and search for duplicate issues before filling out the form below. - type: textarea id: what-happened attributes: label: What happened? description: | Describe what happened and how to reproduce the bug. Include the full code and data to reproduce it. Use a simple toy data set, e.g. from `altair.datasets`. validations: required: true - type: textarea id: desired-behavior attributes: label: What would you like to happen instead? description: Describe the expected/desired behavior. - type: input id: altair-version attributes: label: Which version of Altair are you using? description: Use `alt.__version__` to find out ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Question url: https://stackoverflow.com/tags/altair about: Please ask questions such as "How do I do X?" or "Why does this not work?" on Stack Overflow using the `altair` tag. - name: Discussion url: https://github.com/vega/altair/discussions about: If you want to discuss a topic or ask a question that is not a good fit for Stack Overflow, please open a new disscussion here on GitHub. ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request.yml ================================================ name: Feature request description: Suggest an improvement labels: ["enhancement"] body: - type: markdown attributes: value: | Thanks for taking the time to suggest a new feature! Note that since Altair is a Python wrapper around the Vega-Lite visualization grammar, [most feature requests should be reported directly to Vega-Lite](https://github.com/vega/vega-lite/issues). You can click the Action Button (`...`) of your Altair chart and "Open in Vega Editor" to see the Vega-Lite chart specification. If you believe this feature is more relevant for Altair than Vega-Lite, then make sure to search for duplicate issues before filling out the form below. - type: textarea id: what-happened attributes: label: What is your suggestion? description: Describe the feature's goal, motivating use cases, and its expected behavior. validations: required: true - type: textarea id: alternative-solutions attributes: label: Have you considered any alternative solutions? ================================================ FILE: .github/dependabot.yaml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" groups: github-actions: patterns: - "*" commit-message: prefix: "ci" ================================================ FILE: .github/pull_request_template.md ================================================ ## Thanks for contributing to Altair! 🎉 Please follow these guidelines: ### 1. **PR Description** - Briefly describe the changes and their purpose. For help, check this [guide](https://medium.com/@greenberg/writing-pull-requests-your-coworkers-might-enjoy-reading-9d0307e93da3). ### 2. **Tests & Docs** - Include unit tests and update documentation for new features. ### 3. **Commit Message** - Use [semantic commit messages](https://www.conventionalcommits.org/), e.g., `"feat: Add embed_options to charts"`. - Add `!` for breaking changes (e.g., `"fix!: Raise error when embed_options is None"`). ### 4. **PR Title Types** - **feat**: New feature - **fix**: Bug fix - **docs**: Documentation changes - **style**: Code style changes (no functionality change) - **refactor**: Code restructuring - **perf**: Performance improvements - **test**: Add or fix tests - **build**: Changes to build system or dependencies - **ci**: CI configuration changes - **chore**: Miscellaneous tasks - **revert**: Reverts a commit ================================================ FILE: .github/release.yml ================================================ changelog: categories: - title: Breaking labels: - breaking - title: Deprecation labels: - deprecation - title: Enhancements labels: - enhancement - title: Bug Fixes labels: - bug - title: Maintainance labels: - maintainance - title: Documentation labels: - documentation - title: Other Changes labels: - "*" ================================================ FILE: .github/workflows/build-free-threaded.yml ================================================ name: build-free-threaded on: workflow_dispatch: # Manual trigger only; no automatic CI runs env: UV_SYSTEM_PYTHON: 1 jobs: free-threaded: runs-on: ubuntu-latest name: py 3.14t free-threaded steps: - uses: actions/checkout@v6 - name: Set up Python 3.14t uses: actions/setup-python@v6 with: python-version: "3.14t" - name: Install uv uses: astral-sh/setup-uv@v7 - name: Install targeted dependencies run: | set -xe uv pip install -e . # From [project.optional-dependencies].dev in pyproject.toml uv pip install pytest uv pip install pytest-cov uv pip install pytest-run-parallel # uv pip install hatch>=1.13.0 # uv pip install ruff>=0.9.5 # uv pip install 'duckdb>=1.0; python_version<"3.14"' # uv pip install ipython # uv pip install ipykernel # uv pip install pandas>=1.1.3 # uv pip install pyarrow-stubs # uv pip install 'pytest-xdist[psutil]~=3.5' # uv pip install mistune # uv pip install mypy # uv pip install pandas-stubs # uv pip install types-jsonschema # uv pip install types-setuptools # uv pip install geopandas>=0.14.3 # uv pip install polars>=0.20.3 # uv pip install taskipy>=1.14.1 # uv pip install tomli>=2.2.1 - name: Run pytest with pytest-run-parallel run: | uv run pytest -o addopts= --pyargs --doctest-modules --doctest-ignore-import-errors --iterations=8 --parallel-threads=auto tests ================================================ FILE: .github/workflows/build.yml ================================================ name: build on: [push, pull_request] env: UV_SYSTEM_PYTHON: 1 jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] required-dependencies: ["minimum", "latest"] name: py ${{ matrix.python-version }} with ${{ matrix.required-dependencies }} required deps steps: - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install uv uses: astral-sh/setup-uv@v7 # Install dependencies from pyproject.toml (latest versions for all matrix jobs) - name: Install dependencies run: uv pip install -e ".[dev, all]" # Install minimum required versions for compatibility testing (only for 'minimum' matrix value) - name: Pin minimum dependency versions if: ${{ matrix.required-dependencies == 'minimum' }} run: | # Keep in sync with versions in `pyproject.toml` uv pip install jsonschema==3.0 narwhals==1.27.1 typing_extensions==4.12.0 setuptools==81.0.0 - name: Maybe uninstall optional dependencies # We uninstall pyarrow and vegafusion for one job to test that we have not # accidentally introduced a hard dependency on these libraries. # Uninstalling for Python 3.10 is an arbitrary choice. # Also see https://github.com/vega/altair/pull/3114 if: ${{ matrix.python-version == '3.10' }} run: | uv pip uninstall pyarrow vegafusion vl-convert-python anywidget - name: Maybe install lowest supported pandas version # We install the lowest supported pandas version for one job to test that # it still works. Downgrade to the oldest versions of pandas and numpy that include # Python 3.10 wheels, so only run this job for Python 3.10 if: ${{ matrix.python-version == '3.10' }} run: | uv pip install pandas==1.3.4 numpy==1.21.2 - name: Test that schema generation has no effect run: | uv pip install vl-convert-python python tools/generate_schema_wrapper.py # This gets the paths of all files which were either deleted, modified # or are not yet tracked by Git files=$(git ls-files --deleted --modified --others --exclude-standard) # Exclude dataset metadata that is regenerated by the script; parquet output # can differ across platforms/Polars versions (binary non-determinism). exclude_pattern='altair/datasets/_metadata/metadata\.parquet' files_filtered=$(echo "$files" | grep -v -E "^${exclude_pattern}$" || true) # Depending on the shell it can happen that 'files' contains empty # lines which are filtered out in the for loop below files_cleaned=() while IFS= read -r i; do # Skip empty items [ -z "$i" ] && continue files_cleaned+=("$i") done <<< "$files_filtered" if [ ${#files_cleaned[@]} -gt 0 ]; then echo "The code generation modified the following files:" printf '%s\n' "${files_cleaned[@]}" git diff exit 1 fi - name: Test with pytest run: | uv run pytest --pyargs --numprocesses=logical --doctest-modules --doctest-ignore-import-errors tests - name: Validate Vega-Lite schema run: | # We install all 'format' dependencies of jsonschema as check-jsonschema # only does the 'format' checks which are installed. # We can always use the latest jsonschema version here. # uri-reference check is disabled as the URIs in the Vega-Lite schema do # not conform RFC 3986. uv pip install 'jsonschema[format]' check-jsonschema --upgrade uv run check-jsonschema --check-metaschema altair/vegalite/v6/schema/vega-lite-schema.json --disable-formats uri-reference - name: Show installed versions run: uv pip list ================================================ FILE: .github/workflows/check-pr.yml ================================================ name: "Lint PR" on: pull_request_target: types: - opened - edited - synchronize permissions: pull-requests: read jobs: main: name: Validate PR title runs-on: ubuntu-latest steps: - uses: amannn/action-semantic-pull-request@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/docbuild.yml ================================================ name: docbuild on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Python 3.12 uses: actions/setup-python@v6 with: python-version: "3.12" - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: | **/uv.lock **/pyproject.toml - name: Install dependencies run: uv sync --all-extras - name: Build docs run: | mkdir -p doc/_images uv run sphinx-build -b html -d doc/_build/doctrees doc doc/_build/html - name: Run doctests run: | uv run sphinx-build -b doctest -d doc/_build/doctrees doc doc/_build/doctest ================================================ FILE: .github/workflows/lint.yml ================================================ name: lint on: [push, pull_request] jobs: build: runs-on: ubuntu-latest name: ruff-mypy steps: - name: "Set up Python" uses: actions/setup-python@v6 with: python-version: "3.12" - uses: actions/checkout@v6 - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: | **/uv.lock **/pyproject.toml # Installing all dependencies and not just the linters as mypy needs them for type checking - name: Install dependencies run: uv sync --all-extras - name: ruff check (lint) run: | uv run ruff check - name: ruff format run: | uv run ruff format --check --diff - name: mypy (type check) run: | uv run mypy altair tests ================================================ FILE: .github/workflows/weekly.yml ================================================ --- name: Weekly Build and Publish 'on': schedule: - cron: '0 3 * * 1' # Weekly on Mondays at 3am UTC workflow_dispatch: inputs: force_build: description: 'Force build even if no changes detected' type: boolean default: false required: false env: UV_SYSTEM_PYTHON: 1 jobs: check-changes: runs-on: ubuntu-latest outputs: has-changes: '${{ steps.check.outputs.has-changes }}' last-weekly-tag: '${{ steps.check.outputs.last-weekly-tag }}' current-commit: '${{ steps.check.outputs.current-commit }}' steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Check for changes since last weekly build id: check run: | # Get the latest weekly release tag LATEST_WEEKLY_TAG=$(git tag --list "weekly-*" --sort=-version:refname | head -n 1) if [ -z "$LATEST_WEEKLY_TAG" ]; then echo "No weekly tags found, will build (first time)" echo "has-changes=true" >> $GITHUB_OUTPUT echo "last-weekly-tag=" >> $GITHUB_OUTPUT echo "current-commit=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT exit 0 fi # Get the commit hash from the latest weekly tag LATEST_WEEKLY_COMMIT=$(git rev-list -n 1 "$LATEST_WEEKLY_TAG") CURRENT_COMMIT=$(git rev-parse HEAD) echo "last-weekly-tag=${LATEST_WEEKLY_TAG}" >> $GITHUB_OUTPUT echo "current-commit=${CURRENT_COMMIT}" >> $GITHUB_OUTPUT # Check if there are any commits since the last weekly build if [ "$LATEST_WEEKLY_COMMIT" = "$CURRENT_COMMIT" ]; then echo "No changes since last weekly build" echo "has-changes=false" >> $GITHUB_OUTPUT else echo "Changes detected since last weekly build" echo "has-changes=true" >> $GITHUB_OUTPUT fi weekly-build: needs: check-changes if: needs.check-changes.outputs.has-changes == 'true' || github.event.inputs.force_build == 'true' runs-on: ubuntu-latest strategy: matrix: python-version: - '3.10' - '3.11' - '3.12' - '3.13' name: 'Test py ${{ matrix.python-version }}' steps: - uses: actions/checkout@v6 - name: 'Set up Python ${{ matrix.python-version }}' uses: actions/setup-python@v6 with: python-version: '${{ matrix.python-version }}' - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: | **/uv.lock **/pyproject.toml - name: Install dependencies run: | uv sync --all-extras - name: Test with pytest run: | uv run task test weekly-publish: needs: - check-changes - weekly-build if: needs.check-changes.outputs.has-changes == 'true' || github.event.inputs.force_build == 'true' runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/altair permissions: contents: write id-token: write steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install uv uses: astral-sh/setup-uv@v7 with: enable-cache: true cache-dependency-glob: | **/uv.lock **/pyproject.toml - name: Install dependencies run: | uv sync --all-extras - name: Generate weekly version and tag id: version run: | # Generate weekly version based on current date and commit # Get base version from altair/__init__.py BASE_VERSION=$(grep '__version__ = ' altair/__init__.py | sed 's/__version__ = "\(.*\)"/\1/') DATE=$(date +%Y%m%d) COMMIT=$(git rev-parse --short HEAD) COMMIT_SHA=$(git rev-parse HEAD) # PEP 440 compliant dev version - handle cases where base version already has 'dev' if [[ "$BASE_VERSION" == *"dev" ]]; then # If base version already has 'dev', replace it with proper dev format VERSION=$(echo "$BASE_VERSION" | sed 's/dev$/.dev'${DATE}'/') else # If base version doesn't have 'dev', add it VERSION="${BASE_VERSION}.dev${DATE}" fi TAG_NAME="weekly-${DATE}-${COMMIT}" echo "Generated weekly version: ${VERSION}" echo "Generated tag name: ${TAG_NAME}" echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "tag_name=${TAG_NAME}" >> $GITHUB_OUTPUT echo "commit_sha=${COMMIT_SHA}" >> $GITHUB_OUTPUT - name: Update version files run: | # Update version in __init__.py sed -i "s/__version__ = .*/__version__ = \"${{ steps.version.outputs.version }}\"/" altair/__init__.py # Update version in conf.py sed -i "s/release = .*/release = \"${{ steps.version.outputs.version }}\"/" doc/conf.py - name: Build package run: | uv run task build - name: Generate dependency snapshot id: deps run: | # Get current dependencies uv pip freeze > current_deps.txt # Check if we can compare with previous weekly release LATEST_WEEKLY_TAG="${{ needs.check-changes.outputs.last-weekly-tag }}" if [ -n "$LATEST_WEEKLY_TAG" ]; then echo "Comparing dependencies with previous tag: ${LATEST_WEEKLY_TAG}" # Try to get previous uv.lock from git history if git show ${LATEST_WEEKLY_TAG}:uv.lock > previous_uv.lock 2>/dev/null; then if diff -u previous_uv.lock uv.lock > dependency_changes.txt 2>&1; then echo "No dependency changes detected" echo "dependency_changes=false" >> $GITHUB_OUTPUT # Ensure the artifact has content even when there are no changes if [ ! -s dependency_changes.txt ]; then echo "No dependency changes detected" > dependency_changes.txt fi else echo "Dependency changes detected" echo "dependency_changes=true" >> $GITHUB_OUTPUT fi else echo "No previous uv.lock found at ${LATEST_WEEKLY_TAG}" > dependency_changes.txt echo "dependency_changes=false" >> $GITHUB_OUTPUT fi else echo "First weekly build - no previous dependencies to compare" > dependency_changes.txt echo "dependency_changes=false" >> $GITHUB_OUTPUT fi - name: Generate binary file checksums id: checksums run: | # Find all binary files in the project (with proper parentheses for OR operations) find . \( -name "*.csv.gz" -o -name "*.parquet" -o -name "*.json.gz" \) -type f | while read file; do sha256sum "$file" >> binary_checksums.txt echo "Processed: $file" done # Ensure file exists even if no binary files found touch binary_checksums.txt # Note: We cannot compare with previous checksums since binary_checksums.txt # is generated during the workflow and not committed to git. # Instead, we just upload the current checksums for reference. echo "Generated checksums for $(wc -l < binary_checksums.txt) binary files" > binary_changes.txt echo "binary_changes=false" >> $GITHUB_OUTPUT - name: Prepare release assets run: | # Ensure all text files exist to avoid upload errors touch dependency_changes.txt current_deps.txt binary_checksums.txt binary_changes.txt # List files that will be uploaded echo "Release assets:" ls -lh dist/ ls -lh *.txt 2>/dev/null || echo "No text files" - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true skip-existing: true - name: Create GitHub release uses: softprops/action-gh-release@v2 with: tag_name: ${{ steps.version.outputs.tag_name }} name: Weekly Build ${{ steps.version.outputs.version }} body: | ## Weekly Pre-Release Build of Altair This is a pre-release version for testing purposes. ### Build Information **Version:** ${{ steps.version.outputs.version }} **Tag:** ${{ steps.version.outputs.tag_name }} **Previous Weekly Tag:** ${{ needs.check-changes.outputs.last-weekly-tag || 'None (first build)' }} ## Installation ### From PyPI (recommended) Install the latest weekly build directly from PyPI: ```bash pip install altair==${{ steps.version.outputs.version }} # or uv pip install altair==${{ steps.version.outputs.version }} ``` _Note_: Weekly builds publish timestamped development versions (for example `${{ steps.version.outputs.version }}`) to PyPI. When you pin that exact version, `pip` installs the dev build automatically, without the need for a `--pre` flag. ### From GitHub Repository (direct install) Install directly from the tagged commit without downloading the wheel: **Command line (pip or uv):** ```bash pip install git+https://github.com/${{ github.repository }}.git@${{ steps.version.outputs.tag_name }} # or uv pip install git+https://github.com/${{ github.repository }}.git@${{ steps.version.outputs.tag_name }} ``` _Note_: Installing directly from the `weekly-...` tag will surface the base development version (without the timestamp suffix) because the version file edits are not committed. **Add to pyproject.toml (pip/uv):** ```toml [project] dependencies = [ "altair @ git+https://github.com/${{ github.repository }}.git@${{ steps.version.outputs.tag_name }}", ] ``` **Add to pixi.toml (pixi):** ```toml [pypi-dependencies] altair = { git = "https://github.com/${{ github.repository }}.git", rev = "${{ steps.version.outputs.tag_name }}" } ``` ### From GitHub Release (manual download) Download the wheel file from the assets below and install: ```bash pip install altair-${{ steps.version.outputs.version }}-py3-none-any.whl ``` ## Testing & Feedback **Please note:** This is a testing version. If you encounter any issues or unexpected behavior, we would greatly appreciate if you [open an issue](https://github.com/${{ github.repository }}/issues) to report it. Your feedback helps improve Altair! draft: false prerelease: true files: | dist/*.whl dist/*.tar.gz dependency_changes.txt current_deps.txt binary_checksums.txt binary_changes.txt env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Cleanup old weekly releases env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | # Wait a moment to ensure the new release is fully created sleep 5 # Get all weekly releases sorted by creation date (newest first) echo "Fetching all weekly releases..." WEEKLY_RELEASES=$(gh release list --limit 100 --json tagName,isPrerelease,createdAt --jq '.[] | select(.tagName | startswith("weekly-")) | .tagName' | head -n 100) # Count total weekly releases (handle empty list) if [ -z "$WEEKLY_RELEASES" ]; then echo "No weekly releases found" exit 0 fi TOTAL_WEEKLY=$(echo "$WEEKLY_RELEASES" | wc -l | tr -d ' ') echo "Found ${TOTAL_WEEKLY} weekly releases" # Keep only the 7 most recent, delete the rest if [ "$TOTAL_WEEKLY" -gt 7 ]; then echo "Keeping 7 most recent weekly releases, deleting $(($TOTAL_WEEKLY - 7)) old ones..." echo "$WEEKLY_RELEASES" | tail -n +8 | while read -r tag; do if [ -n "$tag" ]; then echo "Deleting release and tag: $tag" gh release delete "$tag" --yes --cleanup-tag 2>&1 || echo "Warning: Failed to delete $tag, continuing..." fi done echo "Cleanup complete!" else echo "Only ${TOTAL_WEEKLY} weekly releases found, no cleanup needed (keeping max 7)" fi ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ venv .venv build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ # emacs backups *~ \#*\# .ipynb_checkpoints .idea/* tools/_build Untitled*.ipynb .mypy* .pytest_cache *.DS_Store # VSCode .vscode # hatch, doc generation data.json # type stubs typings/ # Zed editor .zed ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct As a project of the Vega Organization, we use the [Vega Code of Conduct](https://github.com/vega/.github/blob/main/CODE_OF_CONDUCT.md). ================================================ FILE: CONTRIBUTING.md ================================================ # Feedback and Contribution We welcome any input, feedback, bug reports, and contributions via [Altair's GitHub Repository](http://github.com/vega/altair/). In particular, we welcome companion efforts from other visualization libraries to render the Vega-Lite specifications output by Altair. We see this portion of the effort as much bigger than Altair itself: the Vega and Vega-Lite specifications are perhaps the best existing candidates for a principled *lingua franca* of data visualization. We are also seeking contributions of additional Jupyter notebook-based examples in our separate GitHub repository: https://github.com/altair-viz/altair_notebooks. All contributions, suggestions, and feedback you submitted are accepted under the [Project's license](./LICENSE). You represent that if you do not own copyright in the code that you have the authority to submit it under the [Project's license](./LICENSE). All feedback, suggestions, or contributions are not confidential. The Project abides by the Vega Organization's [code of conduct](https://github.com/vega/.github/blob/main/CODE_OF_CONDUCT.md) and [governance](https://github.com/vega/.github/blob/main/project-docs/GOVERNANCE.md). ## How To Contribute Code to Vega-Altair ### Setting Up Your Environment Fork the Altair repository on GitHub and then clone the fork to you local machine. For more details on forking see the [GitHub Documentation](https://help.github.com/en/articles/fork-a-repo). ```cmd git clone https://github.com/YOUR-USERNAME/altair.git ``` To keep your fork up to date with changes in this repo, you can [use the fetch upstream button on GitHub](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork). [Install `uv`](https://docs.astral.sh/uv/getting-started/installation/), or update to the latest version: ```cmd uv self update ``` Install Python: ```cmd uv python install 3.12 ``` Initialize a new virtual environment: ```cmd cd altair/ uv venv -p 3.12 ``` Activate your environment:
macOS/Linux

```bash source .venv/bin/activate ```

Windows

```cmd .venv\Scripts\activate ```

Install the project with all development dependencies: ```cmd uv sync --all-extras ``` > [!TIP] > If you're new to `uv`, check out their [Getting started](https://docs.astral.sh/uv/getting-started/) guide for help ### Creating a Branch Once your local environment is up-to-date, you can create a new git branch which will contain your contribution (always create a new branch instead of making changes to the main branch): ```cmd git switch -c ``` With this branch checked-out, make the desired changes to the package. A large part of Altair's code base is automatically generated. After you have made your manual changes, make sure to run the following to see if there are any changes to the automatically generated files: ```bash uv run task generate-schema-wrapper ``` For information on how to update the Vega-Lite version that Altair uses, please read [the maintainers' notes](NOTES_FOR_MAINTAINERS.md). ### Testing your Changes Before submitting your changes to the main Altair repository, it is recommended that you run the Altair test suite, which includes a number of tests to validate the correctness of your code: ```bash uv run task test ``` This also runs the [`ruff`](https://ruff.rs/) linter and formatter as well as [`mypy`](https://mypy-lang.org/) as type checker. Study the output of any failed tests and try to fix the issues before proceeding to the next section. #### Failures on specific python version(s) By default, `uv run task test` will run the test suite against the currently active python version. Two useful variants for debugging failures that only appear *after* you've submitted your PR: ```bash # Test against all python version(s) in the matrix uv run task test-all # Test against our minimum required version uv run task test-min ``` See [hatch test](https://hatch.pypa.io/latest/cli/reference/#hatch-test) docs for other options. #### Changes to `__all__` If `test_completeness_of__all__` fails, you may need to run: ```bash uv run task update-init-file ``` However, this test usually indicates *unintentional* addition(s) to the top-level `alt.` namespace that will need resolving first. ### Creating a Pull Request When you are happy with your changes, you can commit them to your branch by running ```cmd git add git commit -m "Some descriptive message about your change" git push origin ``` You will then need to submit a pull request (PR) on GitHub asking to merge your example branch into the main Altair repository. For details on creating a PR see GitHub documentation [Creating a pull request](https://help.github.com/en/articles/creating-a-pull-request). You can add more details about your example in the PR such as motivation for the example or why you thought it would be a good addition. You will get feed back in the PR discussion if anything needs to be changed. To make changes continue to push commits made in your local example branch to origin and they will be automatically shown in the PR. Hopefully your PR will be answered in a timely manner and your contribution will help others in the future. ## How To Contribute Documentation to Vega-Altair Altair documentation is written in [reStructuredText](http://docutils.sourceforge.net/rst.html) and compiled into html pages using [Sphinx](http://www.sphinx-doc.org/en/master/). Contributing to the documentation requires some extra dependencies and we have some conventions and plugins that are used to help navigate the docs and generate great Altair visualizations. Note that the [Altair website](https://altair-viz.github.io/) is only updated when a new version is released so your contribution might not show up for a while. ### Adding Examples We are always interested in new examples contributed from the community. These could be everything from simple one-panel scatter and line plots, to more complicated layered or stacked plots, to more advanced interactive features. Before submitting a new example check the [Altair Example Gallery](https://altair-viz.github.io/gallery/index.html) to make sure that your idea has not already been implemented. Once you have an example you would like to add there are a few guide lines to follow. Every example should: - have a `arguments_syntax` and `methods_syntax` implementation. Each implementation must be saved as a stand alone script in the `tests/examples_arguments_syntax` and `tests/examples_methods_syntax` directories. - have a descriptive docstring, which will eventually be extracted for the documentation website. - contain a category tag. - define a chart variable with the main chart object (This will be used both in the unit tests to confirm that the example executes properly, and also eventually used to display the visualization on the documentation website). - not make any external calls to download data within the script (i.e. don't use urllib). You can define your data directly within the example file, generate your data using pandas and numpy, or you can use data available in the `altair.datasets` module. The easiest way to get started would be to adapt examples from the [Vega-Lite example gallery](https://vega.github.io/vega-lite/examples/) which are missing in the Altair gallery. Or you can feel free to be creative and build your own visualizations. Often it is convenient to draft an example outside of the main repository, such as [Google Colab](https://colab.research.google.com/), to avoid difficulties when working with git. Once you have an example you would like to add, follow the same contribution procedure outlined above. Some additional notes: - The format and style of new contributions should generally match that of existing examples. - The file docstring will be rendered into HTML via [reStructuredText](http://docutils.sourceforge.net/rst.html), so use that format for any hyperlinks or text styling. In particular, be sure you include a title in the docstring underlined with `---`, and be sure that the size of the underline exactly matches the size of the title text. - If your example fits into a chart type but involves significant configuration it should be in the `Case Studies` category. - For consistency all data used for a visualization should be assigned to the variable `source`. Then `source` is passed to the `alt.Chart` object. If the example requires multiple dataframes then this does not apply. See other examples for guidance. - Example code should not require downloading external datasets. We suggest using the `altair.datasets` module if possible. If you are using the `altair.datasets` module there are multiple ways to refer to a data source. The data can be referenced directly, such as `source = data.penguins()`, or it can be referenced by URL, such as `source = data.movies.url`. This is to ensure that Altair's automated test suite does not depend on availability of external HTTP resources. - If VlConvert does not support PNG export of the chart (e.g. in the case of emoji), then add the name of the example to the `SVG_EXAMPLES` set in `tests/examples_arguments_syntax/__init__.py` and `tests/examples_methods_syntax/__init__.py` ### Building the Documentation Locally The process to build the documentation locally consists of three steps: 1. **Clean** (remove) any previously generated documentation files. 2. **Build** the documentation in HTML format. 3. View the documentation using a *local* Python testing **server**. Steps 1 & 2 can be run as a single command, followed by step 3: ```cmd uv run task doc-clean-build uv run task doc-serve ``` > [!TIP] > If these commands were not available for you, make sure you've [set up your environment](#setting-up-your-environment) To view the documentation, open your browser and go to `http://localhost:8000`. To stop the server, use `^C` (control+c) in the terminal. --- Part of MVG-0.1-beta. Made with love by GitHub. Licensed under the [CC-BY 4.0 License](https://creativecommons.org/licenses/by-sa/4.0/). ================================================ FILE: LICENSE ================================================ Copyright (c) 2015-2025, Vega-Altair Developers All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of vega-altair nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: NOTES_FOR_MAINTAINERS.md ================================================ # Notes for Maintainers of Altair ## Auto-generating the Python code The core Python API for Altair can be found in the following locations: - ``altair/vegalite/v6/schema/`` All the files within these directories are created automatically by running the following script: ```bash uv run task generate-schema-wrapper ``` This script does a couple things: - downloads the appropriate schema files from the specified vega-lite release versions & copies the JSON file to the appropriate ``schema`` directory - generates basic low-level schemapi wrappers from the definitions within the schema: this is put in the ``schema/core.py`` file - generates a second layer of higher level wrappers for some vega-lite functionality; this is put in ``schema/channels.py`` and ``schema/mixins.py`` The script output is designed to be deterministic; if the vega-lite version is not changed, then running the script should overwrite the schema wrappers with identical copies. ## Updating Vega versions All versions are maintained in [pyproject.toml](pyproject.toml). ### Python Packages Projects which publish a package to PyPI are listed with a version bound in one of the following tables: - [`project.dependencies`](https://packaging.python.org/en/latest/specifications/pyproject-toml/#dependencies-optional-dependencies): Published dependencies. - [`project.optional-dependencies`](https://packaging.python.org/en/latest/specifications/pyproject-toml/#dependencies-optional-dependencies): Published optional dependencies, or "extras". - [`dependency-groups`](https://peps.python.org/pep-0735/): Local dependencies for development. > [!NOTE] > All are currently declared in sub-tables of `project.optional-dependencies`. The lower version bounds defined here are reused for [altair/utils/_importers.py](altair/utils/_importers.py). #### `vl-convert` We need to ensure that [vl-convert](https://github.com/vega/vl-convert) includes support for the new Vega-Lite version. Check the [vl-convert releases](https://github.com/vega/vl-convert/releases) to find the minimum version of `vl-convert` that includes support for the desired version of Vega-Lite (and [open an issue](https://github.com/vega/vl-convert/issues) if this version hasn't been included in a released yet). ### Javascript/other Additional version constraints, including for [`Vega-Lite`](https://github.com/vega/vega-lite) itself are declared in `[tool.altair.vega]`. Whereas the [previous dependencies](#python-packages) are used primarily at *install-time*; this group is embedded into `altair` for use at *runtime* or when [generating the python code](#auto-generating-the-python-code): ```toml [tool.altair.vega] vega-datasets = "..." # https://github.com/vega/vega-datasets vega-embed = "..." # https://github.com/vega/vega-embed vega-lite = "..." # https://github.com/vega/vega-lite ``` Some examples of where these propagate to: - [altair/jupyter/js/index.js](altair/jupyter/js/index.js) - [altair/utils/_importers.py](altair/utils/_importers.py) - [tools/generate_schema_wrapper.py](tools/generate_schema_wrapper.py) - [tools/versioning.py](tools/versioning.py) - [altair/utils/schemapi.py](https://github.com/vega/altair/blob/0e23fd33e9a755bab0ef73a856340c48c14897e6/altair/utils/schemapi.py#L1619-L1640) > [!IMPORTANT] > When updating **any** of these versions, be sure to [re-generate the python code](#auto-generating-the-python-code). #### Updating the Vega-Lite version The Vega-Lite version for the Python code propagates to `tools.generate_schema_wrapper.SCHEMA_VERSION`. This will update all of the automatically-generated files in the ``schema`` directory for each version, but please note that it will *not* update other pieces (for example, the core of the Altair API, including methods and doc strings within ``altair/vegalite/v6/api.py``). These additional methods have fairly good test coverage, so running the test suite should identify any inconsistencies: ```bash uv run task test ``` Generally, minor version updates (e.g. Vega-Lite 2.3->2.4) have been relatively painless, maybe requiring the addition of a few chart methods or modification of some docstrings. Major version updates (e.g. Vega-Lite 1.X->2.X) have required substantial rewrites, because the internal structure of the schema changed appreciably. ## Releasing the Package To cut a new release of Altair, follow the steps outlined in [RELEASING.md](RELEASING.md). ## Web analytics We use the privacy-friendly [plausible.io](https://plausible.io/) for tracking usage statistics of our documentation. It is hosted on [https://views.scientific-python.org](https://views.scientific-python.org). You can view the stats [here](https://views.scientific-python.org/altair-viz.github.io). To get an account to edit the settings of the web tracking, ask another maintainer. ================================================ FILE: README.md ================================================ # Vega-Altair [![github actions](https://github.com/vega/altair/workflows/build/badge.svg)](https://github.com/vega/altair/actions?query=workflow%3Abuild) [![typedlib_mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://www.mypy-lang.org) [![JOSS Paper](https://joss.theoj.org/papers/10.21105/joss.01057/status.svg)](https://joss.theoj.org/papers/10.21105/joss.01057) [![PyPI - Downloads](https://img.shields.io/pypi/dm/altair)](https://pypi.org/project/altair) **Vega-Altair** is a declarative statistical visualization library for Python. With Vega-Altair, you can spend more time understanding your data and its meaning. Vega-Altair's API is simple, friendly and consistent and built on top of the powerful [Vega-Lite](https://github.com/vega/vega-lite) JSON specification. This elegant simplicity produces beautiful and effective visualizations with a minimal amount of code. *Vega-Altair was originally developed by [Jake Vanderplas](https://github.com/jakevdp) and [Brian Granger](https://github.com/ellisonbg) in close collaboration with the [UW Interactive Data Lab](https://idl.cs.washington.edu/).* *The Vega-Altair open source project is not affiliated with Altair Engineering, Inc.* ## Documentation See [Vega-Altair's Documentation Site](https://altair-viz.github.io) as well as the [Tutorial Notebooks](https://github.com/altair-viz/altair_notebooks). You can run the notebooks directly in your browser by clicking on one of the following badges: [![Binder](https://beta.mybinder.org/badge.svg)](https://beta.mybinder.org/v2/gh/altair-viz/altair_notebooks/master) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/altair-viz/altair_notebooks/blob/master/notebooks/Index.ipynb) ## Example Here is an example using Vega-Altair to quickly visualize and display a dataset with the native Vega-Lite renderer in the JupyterLab: ```python import altair as alt # load a simple dataset as a pandas DataFrame from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color='Origin', ) ``` ![Vega-Altair Visualization](https://raw.githubusercontent.com/altair-viz/altair/main/images/cars.png) One of the unique features of Vega-Altair, inherited from Vega-Lite, is a declarative grammar of not just visualization, but _interaction_. With a few modifications to the example above we can create a linked histogram that is filtered based on a selection of the scatter plot. ```python import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval() points = alt.Chart(source).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color=alt.when(brush).then("Origin").otherwise(alt.value("lightgray")) ).add_params( brush ) bars = alt.Chart(source).mark_bar().encode( y='Origin', color='Origin', x='count(Origin)' ).transform_filter( brush ) points & bars ``` ![Vega-Altair Visualization Gif](https://raw.githubusercontent.com/altair-viz/altair/main/images/cars_scatter_bar.gif) ## Features * Carefully-designed, declarative Python API. * Auto-generated internal Python API that guarantees visualizations are type-checked and in full conformance with the [Vega-Lite](https://github.com/vega/vega-lite) specification. * Display visualizations in JupyterLab, Jupyter Notebook, Visual Studio Code, on GitHub and [nbviewer](https://nbviewer.jupyter.org/), and many more. * Export visualizations to various formats such as PNG/SVG images, stand-alone HTML pages and the [Online Vega-Lite Editor](https://vega.github.io/editor/#/). * Serialize visualizations as JSON files. ## Installation Vega-Altair can be installed with: ```bash pip install altair ``` If you are using the conda package manager, the equivalent is: ```bash conda install altair -c conda-forge ``` For full installation instructions, please see [the documentation](https://altair-viz.github.io/getting_started/installation.html). ## Getting Help If you have a question that is not addressed in the documentation, you can post it on [StackOverflow](https://stackoverflow.com/questions/tagged/altair) using the `altair` tag. For bugs and feature requests, please open a [Github Issue](https://github.com/vega/altair/issues). ## Development [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv) [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![pytest](https://img.shields.io/badge/logo-pytest-blue?logo=pytest&labelColor=5c5c5c&label=%20)](https://github.com/pytest-dev/pytest) For information on how to contribute your developments back to the Vega-Altair repository, see [`CONTRIBUTING.md`](https://github.com/vega/altair/blob/main/CONTRIBUTING.md) ## Citing Vega-Altair [![JOSS Paper](https://joss.theoj.org/papers/10.21105/joss.01057/status.svg)](https://joss.theoj.org/papers/10.21105/joss.01057) If you use Vega-Altair in academic work, please consider citing https://joss.theoj.org/papers/10.21105/joss.01057 as ```bib @article{VanderPlas2018, doi = {10.21105/joss.01057}, url = {https://doi.org/10.21105/joss.01057}, year = {2018}, publisher = {The Open Journal}, volume = {3}, number = {32}, pages = {1057}, author = {Jacob VanderPlas and Brian Granger and Jeffrey Heer and Dominik Moritz and Kanit Wongsuphasawat and Arvind Satyanarayan and Eitan Lees and Ilia Timofeev and Ben Welsh and Scott Sievert}, title = {Altair: Interactive Statistical Visualizations for Python}, journal = {Journal of Open Source Software} } ``` Please additionally consider citing the [Vega-Lite](https://vega.github.io/vega-lite/) project, which Vega-Altair is based on: https://dl.acm.org/doi/10.1109/TVCG.2016.2599030 ```bib @article{Satyanarayan2017, author={Satyanarayan, Arvind and Moritz, Dominik and Wongsuphasawat, Kanit and Heer, Jeffrey}, title={Vega-Lite: A Grammar of Interactive Graphics}, journal={IEEE transactions on visualization and computer graphics}, year={2017}, volume={23}, number={1}, pages={341-350}, publisher={IEEE} } ``` ================================================ FILE: RELEASING.md ================================================ 1. Check all [Vega project](https://github.com/orgs/vega/repositories?type=source) versions are up-to-date. See [NOTES_FOR_MAINTAINERS.md](NOTES_FOR_MAINTAINERS.md) 2. Make sure to have [set up your environment](CONTRIBUTING.md#setting-up-your-environment). Update your environment with the latest dependencies: uv sync --all-extras 3. Make certain your branch is in sync with head, and that you have no uncommitted modifications. If you work on a fork, replace `origin` with `upstream`: git checkout main git pull origin main git status # Should show "nothing to commit, working tree clean" 4. Do a [clean doc build](CONTRIBUTING.md#building-the-documentation-locally): Navigate to http://localhost:8000 and ensure it looks OK (particularly do a visual scan of the gallery thumbnails). 5. Create a new release branch: git switch -c version_6.0.0 6. Update version to, e.g. 6.0.0: - in ``altair/__init__.py`` - in ``doc/conf.py`` 7. Commit changes and push: git add . -u git commit -m "chore: Bump version to 6.0.0" git push 8. Merge release branch into main, make sure that all required checks pass 9. Switch to main, If you work on a fork, replace `origin` with `upstream`: git switch main git pull origin main 10. Build a source distribution and universal wheel, publish to PyPI (Requires correct PyPI owner permissions and [UV_PUBLISH_TOKEN](https://docs.astral.sh/uv/configuration/environment/#uv_publish_token)): uv run task publish 11. Build and publish docs (Requires write-access to [altair-viz/altair-viz.github.io](https://github.com/altair-viz/altair-viz.github.io)): uv run task doc-publish-clean-build 12. On main, tag the release. If you work on a fork, replace `origin` with `upstream`: git tag -a v6.0.0 -m "Version 6.0.0 release" git push origin v6.0.0 13. Create a new branch: git switch -c maint_6.1.0dev 14. Update version and add 'dev' suffix, e.g. 6.1.0dev: - in ``altair/__init__.py`` - in ``doc/conf.py`` 15. Commit changes and push: git add . -u git commit -m "chore: Bump version to 6.1.0dev" git push 16. Merge maintenance branch into main 17. Double-check that a conda-forge pull request is generated from the updated pip package by the conda-forge bot (may take up to several hours): https://github.com/conda-forge/altair-feedstock/pulls 18. Publish a new release in https://github.com/vega/altair/releases/ ================================================ FILE: altair/__init__.py ================================================ # ruff: noqa __version__ = "6.1.0dev" # The content of __all__ is automatically written by # tools/update_init_file.py. Do not modify directly. __all__ = [ "Aggregate", "AggregateOp", "AggregateTransform", "AggregatedFieldDef", "Align", "AllSortString", "AltairDeprecationWarning", "Angle", "AngleDatum", "AngleValue", "AnyMark", "AnyMarkConfig", "AreaConfig", "ArgmaxDef", "ArgminDef", "AutoSizeParams", "AutosizeType", "Axis", "AxisConfig", "AxisOrient", "AxisResolveMap", "BBox", "BarConfig", "BaseTitleNoValueRefs", "Baseline", "Bin", "BinExtent", "BinParams", "BinTransform", "BindCheckbox", "BindDirect", "BindInput", "BindRadioSelect", "BindRange", "Binding", "BinnedTimeUnit", "Blend", "BoxPlot", "BoxPlotConfig", "BoxPlotDef", "BrushConfig", "CalculateTransform", "Categorical", "ChainedWhen", "Chart", "ChartDataType", "Color", "ColorDatum", "ColorDef", "ColorName", "ColorScheme", "ColorValue", "Column", "CompositeMark", "CompositeMarkDef", "CompositionConfig", "ConcatChart", "ConcatSpecGenericSpec", "ConditionalAxisColor", "ConditionalAxisLabelAlign", "ConditionalAxisLabelBaseline", "ConditionalAxisLabelFontStyle", "ConditionalAxisLabelFontWeight", "ConditionalAxisNumber", "ConditionalAxisNumberArray", "ConditionalAxisPropertyAlignnull", "ConditionalAxisPropertyColornull", "ConditionalAxisPropertyFontStylenull", "ConditionalAxisPropertyFontWeightnull", "ConditionalAxisPropertyTextBaselinenull", "ConditionalAxisPropertynumberArraynull", "ConditionalAxisPropertynumbernull", "ConditionalAxisPropertystringnull", "ConditionalAxisString", "ConditionalMarkPropFieldOrDatumDef", "ConditionalMarkPropFieldOrDatumDefTypeForShape", "ConditionalParameterMarkPropFieldOrDatumDef", "ConditionalParameterMarkPropFieldOrDatumDefTypeForShape", "ConditionalParameterStringFieldDef", "ConditionalParameterValueDefGradientstringnullExprRef", "ConditionalParameterValueDefTextExprRef", "ConditionalParameterValueDefnumber", "ConditionalParameterValueDefnumberArrayExprRef", "ConditionalParameterValueDefnumberExprRef", "ConditionalParameterValueDefstringExprRef", "ConditionalParameterValueDefstringnullExprRef", "ConditionalPredicateMarkPropFieldOrDatumDef", "ConditionalPredicateMarkPropFieldOrDatumDefTypeForShape", "ConditionalPredicateStringFieldDef", "ConditionalPredicateValueDefAlignnullExprRef", "ConditionalPredicateValueDefColornullExprRef", "ConditionalPredicateValueDefFontStylenullExprRef", "ConditionalPredicateValueDefFontWeightnullExprRef", "ConditionalPredicateValueDefGradientstringnullExprRef", "ConditionalPredicateValueDefTextBaselinenullExprRef", "ConditionalPredicateValueDefTextExprRef", "ConditionalPredicateValueDefnumber", "ConditionalPredicateValueDefnumberArrayExprRef", "ConditionalPredicateValueDefnumberArraynullExprRef", "ConditionalPredicateValueDefnumberExprRef", "ConditionalPredicateValueDefnumbernullExprRef", "ConditionalPredicateValueDefstringExprRef", "ConditionalPredicateValueDefstringnullExprRef", "ConditionalStringFieldDef", "ConditionalValueDefGradientstringnullExprRef", "ConditionalValueDefTextExprRef", "ConditionalValueDefnumber", "ConditionalValueDefnumberArrayExprRef", "ConditionalValueDefnumberExprRef", "ConditionalValueDefstringExprRef", "ConditionalValueDefstringnullExprRef", "Config", "CsvDataFormat", "Cursor", "Cyclical", "Data", "DataFormat", "DataSource", "DataType", "Datasets", "DateTime", "DatumChannelMixin", "DatumDef", "Day", "DensityTransform", "DerivedStream", "Description", "DescriptionValue", "Detail", "Dict", "DictInlineDataset", "DictSelectionInit", "DictSelectionInitInterval", "Diverging", "DomainUnionWith", "DsvDataFormat", "Element", "Encoding", "EncodingSortField", "ErrorBand", "ErrorBandConfig", "ErrorBandDef", "ErrorBar", "ErrorBarConfig", "ErrorBarDef", "ErrorBarExtent", "EventStream", "EventType", "Expr", "ExprRef", "ExtentTransform", "Facet", "FacetChart", "FacetEncodingFieldDef", "FacetFieldDef", "FacetMapping", "FacetSpec", "FacetedEncoding", "FacetedUnitSpec", "Feature", "FeatureCollection", "FeatureGeometryGeoJsonProperties", "Field", "FieldChannelMixin", "FieldDefWithoutScale", "FieldEqualPredicate", "FieldGTEPredicate", "FieldGTPredicate", "FieldLTEPredicate", "FieldLTPredicate", "FieldName", "FieldOneOfPredicate", "FieldOrDatumDefWithConditionDatumDefGradientstringnull", "FieldOrDatumDefWithConditionDatumDefnumber", "FieldOrDatumDefWithConditionDatumDefnumberArray", "FieldOrDatumDefWithConditionDatumDefstringnull", "FieldOrDatumDefWithConditionMarkPropFieldDefGradientstringnull", "FieldOrDatumDefWithConditionMarkPropFieldDefTypeForShapestringnull", "FieldOrDatumDefWithConditionMarkPropFieldDefnumber", "FieldOrDatumDefWithConditionMarkPropFieldDefnumberArray", "FieldOrDatumDefWithConditionStringDatumDefText", "FieldOrDatumDefWithConditionStringFieldDefText", "FieldOrDatumDefWithConditionStringFieldDefstring", "FieldRange", "FieldRangePredicate", "FieldValidPredicate", "Fill", "FillDatum", "FillOpacity", "FillOpacityDatum", "FillOpacityValue", "FillValue", "FilterTransform", "Fit", "FlattenTransform", "FoldTransform", "FontStyle", "FontWeight", "Format", "FormatConfig", "Generator", "GenericUnitSpecEncodingAnyMark", "GeoJsonFeature", "GeoJsonFeatureCollection", "GeoJsonProperties", "Geometry", "GeometryCollection", "Gradient", "GradientStop", "GraticuleGenerator", "GraticuleParams", "HConcatChart", "HConcatSpecGenericSpec", "Header", "HeaderConfig", "HexColor", "Href", "HrefValue", "Impute", "ImputeMethod", "ImputeParams", "ImputeSequence", "ImputeTransform", "InlineData", "InlineDataset", "Interpolate", "IntervalSelectionConfig", "IntervalSelectionConfigWithoutType", "JoinAggregateFieldDef", "JoinAggregateTransform", "JsonDataFormat", "JupyterChart", "Key", "LabelOverlap", "LatLongDef", "LatLongFieldDef", "Latitude", "Latitude2", "Latitude2Datum", "Latitude2Value", "LatitudeDatum", "LayerChart", "LayerRepeatMapping", "LayerRepeatSpec", "LayerSpec", "LayoutAlign", "Legend", "LegendBinding", "LegendConfig", "LegendOrient", "LegendResolveMap", "LegendStreamBinding", "LineConfig", "LineString", "LinearGradient", "LocalMultiTimeUnit", "LocalSingleTimeUnit", "Locale", "LoessTransform", "LogicalAndPredicate", "LogicalNotPredicate", "LogicalOrPredicate", "Longitude", "Longitude2", "Longitude2Datum", "Longitude2Value", "LongitudeDatum", "LookupData", "LookupSelection", "LookupTransform", "Mark", "MarkConfig", "MarkDef", "MarkInvalidDataMode", "MarkPropDefGradientstringnull", "MarkPropDefnumber", "MarkPropDefnumberArray", "MarkPropDefstringnullTypeForShape", "MarkType", "MaxRowsError", "MergedStream", "Month", "MultiLineString", "MultiPoint", "MultiPolygon", "MultiTimeUnit", "NamedData", "NonArgAggregateOp", "NonLayerRepeatSpec", "NonNormalizedSpec", "NumberLocale", "NumericArrayMarkPropDef", "NumericMarkPropDef", "OffsetDef", "Opacity", "OpacityDatum", "OpacityValue", "Order", "OrderFieldDef", "OrderOnlyDef", "OrderValue", "OrderValueDef", "Orient", "Orientation", "OverlayMarkDef", "Padding", "Parameter", "ParameterExpression", "ParameterExtent", "ParameterName", "ParameterPredicate", "Parse", "ParseValue", "PivotTransform", "Point", "PointSelectionConfig", "PointSelectionConfigWithoutType", "PolarDef", "Polygon", "Position", "Position2Def", "PositionDatumDef", "PositionDatumDefBase", "PositionDef", "PositionFieldDef", "PositionFieldDefBase", "PositionValueDef", "Predicate", "PredicateComposition", "PrimitiveValue", "Projection", "ProjectionConfig", "ProjectionType", "QuantileTransform", "RadialGradient", "Radius", "Radius2", "Radius2Datum", "Radius2Value", "RadiusDatum", "RadiusValue", "RangeConfig", "RangeEnum", "RangeRaw", "RangeRawArray", "RangeScheme", "RectConfig", "RegressionTransform", "RelativeBandSize", "RepeatChart", "RepeatMapping", "RepeatRef", "RepeatSpec", "Resolve", "ResolveMode", "Root", "Row", "RowColLayoutAlign", "RowColboolean", "RowColnumber", "RowColumnEncodingFieldDef", "SCHEMA_URL", "SCHEMA_VERSION", "SampleTransform", "Scale", "ScaleBinParams", "ScaleBins", "ScaleConfig", "ScaleDatumDef", "ScaleFieldDef", "ScaleInterpolateEnum", "ScaleInterpolateParams", "ScaleInvalidDataConfig", "ScaleInvalidDataShowAsValueangle", "ScaleInvalidDataShowAsValuecolor", "ScaleInvalidDataShowAsValuefill", "ScaleInvalidDataShowAsValuefillOpacity", "ScaleInvalidDataShowAsValueopacity", "ScaleInvalidDataShowAsValueradius", "ScaleInvalidDataShowAsValueshape", "ScaleInvalidDataShowAsValuesize", "ScaleInvalidDataShowAsValuestroke", "ScaleInvalidDataShowAsValuestrokeDash", "ScaleInvalidDataShowAsValuestrokeOpacity", "ScaleInvalidDataShowAsValuestrokeWidth", "ScaleInvalidDataShowAsValuetheta", "ScaleInvalidDataShowAsValuetime", "ScaleInvalidDataShowAsValuex", "ScaleInvalidDataShowAsValuexOffset", "ScaleInvalidDataShowAsValuey", "ScaleInvalidDataShowAsValueyOffset", "ScaleInvalidDataShowAsangle", "ScaleInvalidDataShowAscolor", "ScaleInvalidDataShowAsfill", "ScaleInvalidDataShowAsfillOpacity", "ScaleInvalidDataShowAsopacity", "ScaleInvalidDataShowAsradius", "ScaleInvalidDataShowAsshape", "ScaleInvalidDataShowAssize", "ScaleInvalidDataShowAsstroke", "ScaleInvalidDataShowAsstrokeDash", "ScaleInvalidDataShowAsstrokeOpacity", "ScaleInvalidDataShowAsstrokeWidth", "ScaleInvalidDataShowAstheta", "ScaleInvalidDataShowAstime", "ScaleInvalidDataShowAsx", "ScaleInvalidDataShowAsxOffset", "ScaleInvalidDataShowAsy", "ScaleInvalidDataShowAsyOffset", "ScaleResolveMap", "ScaleType", "SchemaBase", "SchemeParams", "SecondaryFieldDef", "SelectionConfig", "SelectionExpression", "SelectionInit", "SelectionInitInterval", "SelectionInitIntervalMapping", "SelectionInitMapping", "SelectionParameter", "SelectionPredicateComposition", "SelectionResolution", "SelectionType", "SequenceGenerator", "SequenceParams", "SequentialMultiHue", "SequentialSingleHue", "Shape", "ShapeDatum", "ShapeDef", "ShapeValue", "SharedEncoding", "SingleDefUnitChannel", "SingleTimeUnit", "Size", "SizeDatum", "SizeValue", "Sort", "SortArray", "SortByChannel", "SortByChannelDesc", "SortByEncoding", "SortField", "SortOrder", "Spec", "SphereGenerator", "StackOffset", "StackTransform", "StandardType", "Step", "StepFor", "Stream", "StringFieldDef", "StringFieldDefWithCondition", "StringValueDefWithCondition", "Stroke", "StrokeCap", "StrokeDash", "StrokeDashDatum", "StrokeDashValue", "StrokeDatum", "StrokeJoin", "StrokeOpacity", "StrokeOpacityDatum", "StrokeOpacityValue", "StrokeValue", "StrokeWidth", "StrokeWidthDatum", "StrokeWidthValue", "StyleConfigIndex", "SymbolShape", "TOPLEVEL_ONLY_KEYS", "Text", "TextBaseline", "TextDatum", "TextDef", "TextDirection", "TextValue", "Then", "Theta", "Theta2", "Theta2Datum", "Theta2Value", "ThetaDatum", "ThetaValue", "TickConfig", "TickCount", "Time", "TimeDef", "TimeFieldDef", "TimeFormatSpecifier", "TimeInterval", "TimeIntervalStep", "TimeLocale", "TimeUnit", "TimeUnitParams", "TimeUnitTransform", "TimeUnitTransformParams", "Title", "TitleAnchor", "TitleConfig", "TitleFrame", "TitleOrient", "TitleParams", "Tooltip", "TooltipContent", "TooltipValue", "TopLevelConcatSpec", "TopLevelFacetSpec", "TopLevelHConcatSpec", "TopLevelLayerSpec", "TopLevelMixin", "TopLevelParameter", "TopLevelRepeatSpec", "TopLevelSelectionParameter", "TopLevelSpec", "TopLevelUnitSpec", "TopLevelVConcatSpec", "TopoDataFormat", "Transform", "Type", "TypeForShape", "TypedFieldDef", "URI", "Undefined", "UnitSpec", "UnitSpecWithFrame", "Url", "UrlData", "UrlValue", "UtcMultiTimeUnit", "UtcSingleTimeUnit", "VConcatChart", "VConcatSpecGenericSpec", "VEGAEMBED_VERSION", "VEGALITE_VERSION", "VEGA_VERSION", "ValueChannelMixin", "ValueDefWithConditionMarkPropFieldOrDatumDefGradientstringnull", "ValueDefWithConditionMarkPropFieldOrDatumDefTypeForShapestringnull", "ValueDefWithConditionMarkPropFieldOrDatumDefnumber", "ValueDefWithConditionMarkPropFieldOrDatumDefnumberArray", "ValueDefWithConditionMarkPropFieldOrDatumDefstringnull", "ValueDefWithConditionStringFieldDefText", "ValueDefnumber", "ValueDefnumberwidthheightExprRef", "VariableParameter", "Vector10string", "Vector12string", "Vector2DateTime", "Vector2Vector2number", "Vector2boolean", "Vector2number", "Vector2string", "Vector3number", "Vector7string", "VegaLite", "VegaLiteSchema", "ViewBackground", "ViewConfig", "When", "WindowEventType", "WindowFieldDef", "WindowOnlyOp", "WindowTransform", "X", "X2", "X2Datum", "X2Value", "XDatum", "XError", "XError2", "XError2Value", "XErrorValue", "XOffset", "XOffsetDatum", "XOffsetValue", "XValue", "Y", "Y2", "Y2Datum", "Y2Value", "YDatum", "YError", "YError2", "YError2Value", "YErrorValue", "YOffset", "YOffsetDatum", "YOffsetValue", "YValue", "api", "binding", "binding_checkbox", "binding_radio", "binding_range", "binding_select", "channels", "check_fields_and_encodings", "compiler", "concat", "condition", "core", "data", "data_transformers", "datasets", "datum", "default_data_transformer", "display", "expr", "graticule", "hconcat", "jupyter", "layer", "limit_rows", "load_ipython_extension", "load_schema", "mixins", "param", "parse_shorthand", "renderers", "repeat", "sample", "schema", "selection_interval", "selection_point", "sequence", "sphere", "theme", "to_csv", "to_json", "to_values", "topo_feature", "typing", "utils", "v6", "value", "vconcat", "vegalite", "vegalite_compilers", "when", "with_property_setters", ] def __dir__(): return __all__ from altair.vegalite import * from altair.vegalite.v6.schema.core import Dict from altair.jupyter import JupyterChart from altair.expr import expr from altair.utils import AltairDeprecationWarning, parse_shorthand, Undefined from altair import datasets, theme, typing def load_ipython_extension(ipython): from altair._magics import vegalite ipython.register_magic_function(vegalite, "cell") def __getattr__(name: str): from altair.utils.deprecation import deprecated_warn if name == "themes": deprecated_warn( "Most cases require only the following change:\n\n" " # Deprecated\n" " alt.themes.enable('quartz')\n\n" " # Updated\n" " alt.theme.enable('quartz')\n\n" "If your code registers a theme, make the following change:\n\n" " # Deprecated\n" " def custom_theme():\n" " return {'height': 400, 'width': 700}\n" " alt.themes.register('theme_name', custom_theme)\n" " alt.themes.enable('theme_name')\n\n" " # Updated\n" " @alt.theme.register('theme_name', enable=True)\n" " def custom_theme():\n" " return alt.theme.ThemeConfig(\n" " {'height': 400, 'width': 700}\n" " )\n\n" "See the updated User Guide for further details:\n" " https://altair-viz.github.io/user_guide/api.html#theme\n" " https://altair-viz.github.io/user_guide/customization.html#chart-themes", version="5.5.0", alternative="altair.theme", stacklevel=3, action="once", ) return theme._themes else: msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) ================================================ FILE: altair/_magics.py ================================================ """Magic functions for rendering vega-lite specifications.""" from __future__ import annotations import json import warnings from importlib.util import find_spec from typing import Any from IPython.core import magic_arguments from narwhals.stable.v1.dependencies import is_pandas_dataframe from altair.vegalite import v6 as vegalite_v6 __all__ = ["vegalite"] RENDERERS = { "vega-lite": { "6": vegalite_v6.VegaLite, }, } TRANSFORMERS = { "vega-lite": { "6": vegalite_v6.data_transformers, }, } def _prepare_data(data, data_transformers): """Convert input data to data for use within schema.""" if data is None or isinstance(data, dict): return data elif is_pandas_dataframe(data): if func := data_transformers.get(): data = func(data) return data elif isinstance(data, str): return {"url": data} else: warnings.warn(f"data of type {type(data)} not recognized", stacklevel=1) return data def _get_variable(name: str) -> Any: """Get a variable from the notebook namespace.""" from IPython.core.getipython import get_ipython if ip := get_ipython(): if name not in ip.user_ns: msg = f"argument '{name}' does not match the name of any defined variable" raise NameError(msg) return ip.user_ns[name] else: msg = ( "Magic command must be run within an IPython " "environment, in which get_ipython() is defined." ) raise ValueError(msg) @magic_arguments.magic_arguments() @magic_arguments.argument( "data", nargs="?", help="local variablename of a pandas DataFrame to be used as the dataset", ) @magic_arguments.argument("-v", "--version", dest="version", default="v6") @magic_arguments.argument("-j", "--json", dest="json", action="store_true") def vegalite(line, cell) -> vegalite_v6.VegaLite: """ Cell magic for displaying vega-lite visualizations in CoLab. %%vegalite [dataframe] [--json] [--version='v6'] Visualize the contents of the cell using Vega-Lite, optionally specifying a pandas DataFrame object to be used as the dataset. if --json is passed, then input is parsed as json rather than yaml. """ args = magic_arguments.parse_argstring(vegalite, line) existing_versions = {"v6": "6"} version = existing_versions[args.version] assert version in RENDERERS["vega-lite"] VegaLite = RENDERERS["vega-lite"][version] data_transformers = TRANSFORMERS["vega-lite"][version] if args.json: spec = json.loads(cell) elif not find_spec("yaml"): try: spec = json.loads(cell) except json.JSONDecodeError as err: msg = ( "%%vegalite: spec is not valid JSON. " "Install pyyaml to parse spec as yaml" ) raise ValueError(msg) from err else: import yaml spec = yaml.load(cell, Loader=yaml.SafeLoader) if args.data is not None: data = _get_variable(args.data) spec["data"] = _prepare_data(data, data_transformers) return VegaLite(spec) ================================================ FILE: altair/datasets/__init__.py ================================================ """ Load example datasets *remotely* from `vega-datasets`_. Provides **70+** datasets, used throughout our `Example Gallery`_. You can learn more about each dataset at `datapackage.md`_. Examples -------- **Primary Interface - Data Object**:: from altair.datasets import data # Load with default engine (pandas) cars_df = data.cars() # Load with specific engine cars_polars = data.cars(engine="polars") cars_pyarrow = data.cars(engine="pyarrow") # Get URL cars_url = data.cars.url # Set default engine for all datasets data.set_default_engine("polars") movies_df = data.movies() # Uses polars engine # List available datasets available_datasets = data.list_datasets() **Expert Interface - Loader**:: from altair.datasets import Loader load = Loader.from_backend("polars") load("penguins") load.url("penguins") This method also provides *precise* Tab completions on the returned object:: load("cars"). # bottom_k # drop # drop_in_place # drop_nans # dtypes # ... **Expert Interface - Direct Functions**:: from altair.datasets import load, url # Load a dataset cars_df = load("cars", backend="polars") # Get dataset URL cars_url = url("cars") .. note:: Requires installation of either `polars`_, `pandas`_, or `pyarrow`_. .. _vega-datasets: https://github.com/vega/vega-datasets .. _Example Gallery: https://altair-viz.github.io/gallery/index.html#example-gallery .. _datapackage.md: https://github.com/vega/vega-datasets/blob/main/datapackage.md .. _polars: https://docs.pola.rs/user-guide/installation/ .. _pandas: https://pandas.pydata.org/docs/getting_started/install.html .. _pyarrow: https://arrow.apache.org/docs/python/install.html """ from __future__ import annotations from typing import TYPE_CHECKING from altair.datasets._loader import Loader if TYPE_CHECKING: import sys from typing import Any if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString from altair.datasets._data import DataObject from altair.datasets._loader import _Load from altair.datasets._typing import Dataset, Extension __all__ = ["Loader", "data", "load", "url"] load: _Load[Any, Any] """ Get a remote dataset and load as tabular data. This is an expert interface. For most users, the data object interface is recommended:: from altair.datasets import data cars = data.cars(engine="polars") For full Tab completions, instead use:: from altair.datasets import Loader load = Loader.from_backend("polars") cars = load("cars") movies = load("movies") Alternatively, specify ``backend`` during a call:: from altair.datasets import load cars = load("cars", backend="polars") movies = load("movies", backend="polars") """ data: DataObject def url( name: Dataset | LiteralString, suffix: Extension | None = None, /, ) -> str: """ Return the address of a remote dataset. This is an expert interface. For most users, the data object interface is recommended:: from altair.datasets import data cars_url = data.cars.url Parameters ---------- name Name of the dataset/`Path.stem`_. suffix File extension/`Path.suffix`_. .. note:: Only needed if ``name`` is available in multiple formats. Returns ------- ``str`` .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem .. _Path.suffix: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix """ from altair.datasets._exceptions import AltairDatasetsError try: from altair.datasets._loader import load url = load.url(name, suffix) except AltairDatasetsError: from altair.datasets._cache import csv_cache url = csv_cache.url(name) return url if not TYPE_CHECKING: def __getattr__(name): if name == "data": from altair.datasets._data import data return data elif name == "load": from altair.datasets._loader import load return load else: msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) ================================================ FILE: altair/datasets/_cache.py ================================================ from __future__ import annotations import os import sys from collections import defaultdict from importlib.util import find_spec from pathlib import Path from typing import TYPE_CHECKING, ClassVar, TypeVar, cast import narwhals.stable.v1 as nw from altair.datasets._exceptions import AltairDatasetsError if sys.version_info >= (3, 12): from typing import Protocol else: from typing_extensions import Protocol if TYPE_CHECKING: from collections.abc import ( Iterable, Iterator, Mapping, MutableMapping, MutableSequence, Sequence, ) from io import IOBase from typing import Any, Final, TypeAlias from urllib.request import OpenerDirector from _typeshed import StrPath from narwhals.stable.v1.dtypes import DType from narwhals.stable.v1.typing import IntoExpr from altair.datasets._typing import Dataset, Metadata if sys.version_info >= (3, 12): from typing import Unpack else: from typing_extensions import Unpack if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString from altair.datasets._typing import FlFieldStr from altair.vegalite.v6.schema._typing import OneOrSeq _Dataset: TypeAlias = "Dataset | LiteralString" _FlSchema: TypeAlias = Mapping[str, FlFieldStr] __all__ = ["CsvCache", "DatasetCache", "SchemaCache", "csv_cache"] _KT = TypeVar("_KT") _VT = TypeVar("_VT") _T = TypeVar("_T") _METADATA_DIR: Final[Path] = Path(__file__).parent / "_metadata" _DTYPE_TO_FIELD: Mapping[type[DType], FlFieldStr] = { nw.Int64: "integer", nw.Float64: "number", nw.Boolean: "boolean", nw.String: "string", nw.Struct: "object", nw.List: "array", nw.Date: "date", nw.Datetime: "datetime", nw.Duration: "duration", # nw.Time: "time" (Not Implemented, but we don't have any cases using it anyway) } """ Similar to `pl.datatypes.convert.dtype_to_ffiname`_. But using `narwhals.dtypes`_ to the string repr of ``frictionless`` `Field Types`_. .. _pl.datatypes.convert.dtype_to_ffiname: https://github.com/pola-rs/polars/blob/85d078c066860e012f5e7e611558e6382b811b82/py-polars/polars/datatypes/convert.py#L139-L165 .. _Field Types: https://datapackage.org/standard/table-schema/#field-types .. _narwhals.dtypes: https://narwhals-dev.github.io/narwhals/api-reference/dtypes/ """ _FIELD_TO_DTYPE: Mapping[FlFieldStr, type[DType]] = { v: k for k, v in _DTYPE_TO_FIELD.items() } def _iter_metadata(df: nw.DataFrame[Any], /) -> Iterator[Metadata]: """ Yield rows from ``df``, where each represents a dataset. See Also -------- ``altair.datasets._typing.Metadata`` """ yield from cast("Iterator[Metadata]", df.iter_rows(named=True)) class CompressedCache(Protocol[_KT, _VT]): fp: Path _mapping: MutableMapping[_KT, _VT] def read(self) -> Any: ... def __getitem__(self, key: _KT, /) -> _VT: ... def __enter__(self) -> IOBase: import gzip return gzip.open(self.fp, mode="rb").__enter__() def __exit__(self, *args) -> None: return def get(self, key: _KT, default: _T, /) -> _VT | _T: return self.mapping.get(key, default) @property def mapping(self) -> MutableMapping[_KT, _VT]: if not self._mapping: self._mapping.update(self.read()) return self._mapping class CsvCache(CompressedCache["_Dataset", "Metadata"]): """ `csv`_, `gzip`_ -based, lazy metadata lookup. Used as a fallback for 2 scenarios: 1. ``url(...)`` when no optional dependencies are installed. 2. ``(Loader|load)(...)`` when the backend is missing* ``.parquet`` support. Notes ----- *All backends *can* support ``.parquet``, but ``pandas`` requires an optional dependency. .. _csv: https://docs.python.org/3/library/csv.html .. _gzip: https://docs.python.org/3/library/gzip.html """ fp = _METADATA_DIR / "metadata.csv.gz" def __init__( self, *, tp: type[MutableMapping[_Dataset, Metadata]] = dict["_Dataset", "Metadata"], ) -> None: self._mapping: MutableMapping[_Dataset, Metadata] = tp() self._rotated: MutableMapping[str, MutableSequence[Any]] = defaultdict(list) def read(self) -> Any: import csv with self as f: b_lines = f.readlines() reader = csv.reader((bs.decode() for bs in b_lines), dialect=csv.unix_dialect) header = tuple(next(reader)) return {row[0]: dict(self._convert_row(header, row)) for row in reader} def _convert_row( self, header: Iterable[str], row: Iterable[str], / ) -> Iterator[tuple[str, Any]]: map_tf = {"true": True, "false": False} for col, value in zip(header, row, strict=False): if col.startswith(("is_", "has_")): yield col, map_tf[value] elif col == "bytes": yield col, int(value) else: yield col, value @property def rotated(self) -> Mapping[str, Sequence[Any]]: """Columnar view.""" if not self._rotated: for record in self.mapping.values(): for k, v in record.items(): self._rotated[k].append(v) return self._rotated def __getitem__(self, key: _Dataset, /) -> Metadata: if meta := self.get(key, None): return meta msg = f"{key!r} does not refer to a known dataset." raise TypeError(msg) def url(self, name: _Dataset, /) -> str: meta = self[name] if meta["suffix"] == ".parquet" and not find_spec("vegafusion"): raise AltairDatasetsError.from_url(meta) return meta["url"] def __repr__(self) -> str: return f"<{type(self).__name__}: {'COLLECTED' if self._mapping else 'READY'}>" class SchemaCache(CompressedCache["_Dataset", "_FlSchema"]): """ `json`_, `gzip`_ -based, lazy schema lookup. - Primarily benefits ``pandas``, which needs some help identifying **temporal** columns. - Utilizes `data package`_ schema types. - All methods return falsy containers instead of exceptions .. _json: https://docs.python.org/3/library/json.html .. _gzip: https://docs.python.org/3/library/gzip.html .. _data package: https://github.com/vega/vega-datasets/pull/631 """ fp = _METADATA_DIR / "schemas.json.gz" def __init__( self, *, tp: type[MutableMapping[_Dataset, _FlSchema]] = dict["_Dataset", "_FlSchema"], implementation: nw.Implementation = nw.Implementation.UNKNOWN, ) -> None: self._mapping: MutableMapping[_Dataset, _FlSchema] = tp() self._implementation: nw.Implementation = implementation def read(self) -> Any: import json with self as f: return json.load(f) def __getitem__(self, key: _Dataset, /) -> _FlSchema: return self.get(key, {}) def by_dtype(self, name: _Dataset, *dtypes: type[DType]) -> list[str]: """ Return column names specfied in ``name``'s schema. Parameters ---------- name Dataset name. *dtypes Optionally, only return columns matching the given data type(s). """ if (match := self[name]) and dtypes: include = {_DTYPE_TO_FIELD[tp] for tp in dtypes} return [col for col, tp_str in match.items() if tp_str in include] else: return list(match) def is_active(self) -> bool: return self._implementation in { nw.Implementation.PANDAS, nw.Implementation.PYARROW, nw.Implementation.MODIN, nw.Implementation.PYARROW, } def schema(self, name: _Dataset, /) -> nw.Schema: it = ((col, _FIELD_TO_DTYPE[tp_str]()) for col, tp_str in self[name].items()) return nw.Schema(it) def schema_kwds(self, meta: Metadata, /) -> dict[str, Any]: name: Any = meta["dataset_name"] if self.is_active() and (self[name]): suffix = meta["suffix"] if self._implementation.is_pandas_like(): if cols := self.by_dtype(name, nw.Date, nw.Datetime): if suffix == ".json": return {"convert_dates": cols} elif suffix in {".csv", ".tsv"}: return {"parse_dates": cols} else: schema = self.schema(name).to_arrow() if suffix in {".csv", ".tsv"}: from pyarrow.csv import ConvertOptions # For pyarrow CSV reading, use the schema as intended # This will fail for non-ISO date formats, but that's the correct behavior # Users can handle this by using a different backend or converting dates manually return {"convert_options": ConvertOptions(column_types=schema)} elif suffix == ".parquet": return {"schema": schema} return {} class _SupportsScanMetadata(Protocol): _opener: ClassVar[OpenerDirector] def _scan_metadata( self, *predicates: OneOrSeq[IntoExpr], **constraints: Unpack[Metadata] ) -> nw.LazyFrame[Any]: ... class DatasetCache: """Opt-out caching of remote dataset requests.""" _ENV_VAR: ClassVar[LiteralString] = "ALTAIR_DATASETS_DIR" _XDG_CACHE: ClassVar[Path] = ( Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) / "altair" ).resolve() def __init__(self, reader: _SupportsScanMetadata, /) -> None: self._rd: _SupportsScanMetadata = reader def clear(self) -> None: """Delete all previously cached datasets.""" self._ensure_active() if self.is_empty(): return None ser = ( self._rd._scan_metadata() .select("sha", "suffix") .unique("sha") .select(nw.concat_str("sha", "suffix").alias("sha_suffix")) .collect() .get_column("sha_suffix") ) names = set[str](ser.to_list()) for fp in self: if fp.name in names: fp.unlink() def download_all(self) -> None: """ Download any missing datasets for latest version. Requires **30-50MB** of disk-space. """ stems = tuple(fp.stem for fp in self) predicates = (~(nw.col("sha").is_in(stems)),) if stems else () frame = ( self._rd._scan_metadata(*predicates, is_image=False) .select("sha", "suffix", "url") .unique("sha") .collect() ) if frame.is_empty(): print("Already downloaded all datasets") return None print(f"Downloading {len(frame)} missing datasets...") for meta in _iter_metadata(frame): self._download_one(meta["url"], self.path_meta(meta)) print("Finished downloads") return None def _maybe_download(self, meta: Metadata, /) -> Path: fp = self.path_meta(meta) return ( fp if (fp.exists() and fp.stat().st_size) else self._download_one(meta["url"], fp) ) def _download_one(self, url: str, fp: Path, /) -> Path: with self._rd._opener.open(url) as f: fp.touch() fp.write_bytes(f.read()) return fp @property def path(self) -> Path: """ Returns path to datasets cache. Defaults to (`XDG_CACHE_HOME`_):: "$XDG_CACHE_HOME/altair/" But can be configured using the environment variable:: "$ALTAIR_DATASETS_DIR" You can set this for the current session via:: from pathlib import Path from altair.datasets import load load.cache.path = Path.home() / ".altair_cache" load.cache.path.relative_to(Path.home()).as_posix() ".altair_cache" You can *later* disable caching via:: load.cache.path = None .. _XDG_CACHE_HOME: https://specifications.freedesktop.org/basedir-spec/latest/#variables """ self._ensure_active() fp = Path(usr) if (usr := os.environ.get(self._ENV_VAR)) else self._XDG_CACHE fp.mkdir(parents=True, exist_ok=True) return fp @path.setter def path(self, source: StrPath | None, /) -> None: if source is not None: os.environ[self._ENV_VAR] = str(Path(source).resolve()) else: os.environ[self._ENV_VAR] = "" def path_meta(self, meta: Metadata, /) -> Path: return self.path / (meta["sha"] + meta["suffix"]) def __iter__(self) -> Iterator[Path]: yield from self.path.iterdir() def __repr__(self) -> str: name = type(self).__name__ if self.is_not_active(): return f"{name}" else: return f"{name}<{self.path.as_posix()!r}>" def is_active(self) -> bool: return not self.is_not_active() def is_not_active(self) -> bool: return os.environ.get(self._ENV_VAR) == "" def is_empty(self) -> bool: """Cache is active, but no files are stored in ``self.path``.""" return next(iter(self), None) is None def _ensure_active(self) -> None: if self.is_not_active(): msg = ( f"Cache is unset.\n" f"To enable dataset caching, set the environment variable:\n" f" {self._ENV_VAR!r}\n\n" f"You can set this for the current session via:\n" f" from pathlib import Path\n" f" from altair.datasets import load\n\n" f" load.cache.path = Path.home() / '.altair_cache'" ) raise ValueError(msg) csv_cache: CsvCache def __getattr__(name): if name == "csv_cache": global csv_cache csv_cache = CsvCache() return csv_cache else: msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) ================================================ FILE: altair/datasets/_constraints.py ================================================ """Set-like guards for matching metadata to an implementation.""" from __future__ import annotations from collections.abc import Set from itertools import chain from typing import TYPE_CHECKING, Any from narwhals.stable import v1 as nw if TYPE_CHECKING: import sys from collections.abc import Iterable, Iterator from altair.datasets._typing import Metadata if sys.version_info >= (3, 12): from typing import Unpack else: from typing_extensions import Unpack from typing import TypeAlias __all__ = [ "Items", "MetaIs", "is_arrow", "is_csv", "is_json", "is_meta", "is_not_tabular", "is_parquet", "is_spatial", "is_topo", "is_tsv", ] Items: TypeAlias = Set[tuple[str, Any]] class MetaIs(Set[tuple[str, Any]]): _requires: frozenset[tuple[str, Any]] def __init__(self, kwds: frozenset[tuple[str, Any]], /) -> None: object.__setattr__(self, "_requires", kwds) @classmethod def from_metadata(cls, meta: Metadata, /) -> MetaIs: return cls(frozenset(meta.items())) def to_metadata(self) -> Metadata: if TYPE_CHECKING: def collect(**kwds: Unpack[Metadata]) -> Metadata: return kwds return collect(**dict(self)) return dict(self) def to_expr(self) -> nw.Expr: """Convert constraint into a narwhals expression.""" if not self: msg = f"Unable to convert an empty set to an expression:\n\n{self!r}" raise TypeError(msg) return nw.all_horizontal(nw.col(name) == val for name, val in self) def isdisjoint(self, other: Iterable[Any]) -> bool: return super().isdisjoint(other) def issubset(self, other: Iterable[Any]) -> bool: return self._requires.issubset(other) def __call__(self, meta: Items, /) -> bool: return self._requires <= meta def __hash__(self) -> int: return hash(self._requires) def __contains__(self, x: object) -> bool: return self._requires.__contains__(x) def __iter__(self) -> Iterator[tuple[str, Any]]: yield from self._requires def __len__(self) -> int: return self._requires.__len__() def __setattr__(self, name: str, value: Any): msg = ( f"{type(self).__name__!r} is immutable.\n" f"Could not assign self.{name} = {value}" ) raise TypeError(msg) def __repr__(self) -> str: items = dict(self) if not items: contents = "" elif suffix := items.pop("suffix", None): contents = ", ".join( chain([f"'*{suffix}'"], (f"{k}={v!r}" for k, v in items.items())) ) else: contents = ", ".join(f"{k}={v!r}" for k, v in items.items()) return f"is_meta({contents})" def is_meta(**kwds: Unpack[Metadata]) -> MetaIs: return MetaIs.from_metadata(kwds) is_csv = is_meta(suffix=".csv") is_json = is_meta(suffix=".json") is_tsv = is_meta(suffix=".tsv") is_arrow = is_meta(suffix=".arrow") is_parquet = is_meta(suffix=".parquet") is_spatial = is_meta(is_spatial=True) is_topo = is_meta(is_topo=True) is_not_tabular = is_meta(is_tabular=False) ================================================ FILE: altair/datasets/_data.py ================================================ """ Data object interface for Altair datasets. This module provides a `data` object that allows accessing datasets as attributes and calling them with backend options, similar to the vega_datasets interface. """ from __future__ import annotations import typing as t from altair.datasets._loader import Loader if t.TYPE_CHECKING: from typing_extensions import LiteralString import pandas as pd import polars as pl import pyarrow as pa from altair.datasets._reader import _Backend from altair.datasets._typing import Dataset class DatasetAccessor: """ Accessor for individual datasets that can be called with backend options. This object provides access to a specific dataset with support for different backends and autocompletion. Call this object to load the dataset: dataset_accessor(engine="polars", **kwds) Parameters for __call__: engine : {"polars", "pandas", "pandas[pyarrow]", "pyarrow"}, optional The backend to use for loading the dataset. **kwds : Any Additional arguments passed to the loader. Examples -------- >>> from altair.datasets import data >>> >>> # Load with default backend >>> cars_df = data.cars() >>> >>> # Load with specific backend >>> cars_polars = data.cars(engine="polars") >>> cars_pandas = data.cars(engine="pandas") >>> # Note: pandas[pyarrow] backend requires pyarrow package >>> >>> # Get URL >>> url = data.cars.url >>> >>> # Use explicit load method >>> cars_df = data.cars.load(engine="polars") """ def __init__(self, name: Dataset, backend: _Backend = "pandas") -> None: import inspect self._name: Dataset = name self._backend: _Backend = backend self._prev_loader: Loader[t.Any, t.Any] self.__signature__ = inspect.signature(self._call_impl) docstring = f"""Load the '{name}' dataset. Parameters ---------- engine : {{"polars", "pandas", "pandas[pyarrow]", "pyarrow"}}, optional The backend to use for loading the dataset. **kwds : Any Additional arguments passed to the loader. Returns ------- DataFrame or Table The loaded dataset. Examples -------- >>> data.{name}() # Load with default backend >>> data.{name}(engine="polars") # Load with specific backend >>> data.{name}.url # Get dataset URL >>> data.{name}.load(engine="polars") # Explicit load method """ self.__doc__ = docstring def _call_impl( self, *, engine: _Backend | None = None, **kwds: t.Any, ) -> t.Any: load = Loader.from_backend(engine) if engine else self._loader return load(self._name, **kwds) @property def _loader(self) -> Loader[t.Any, t.Any]: if hasattr(self, "_prev_loader"): return self._prev_loader self._prev_loader = Loader.from_backend(self._backend) return self._prev_loader @_loader.setter def _loader(self, value: Loader[t.Any, t.Any]) -> None: self._prev_loader = value @property def url(self) -> str: """ Get the URL for this dataset. Returns ------- str The URL of the dataset. Examples -------- >>> from altair.datasets import data >>> cars_url = data.cars.url >>> print(cars_url) https://cdn.jsdelivr.net/npm/vega-datasets@v3.2.1/data/cars.json """ return self._loader.url(self._name) def load(self, *, engine: _Backend | None = None, **kwds: t.Any) -> t.Any: """ Load the dataset with the specified engine. This method provides the same functionality as calling the accessor directly, but with more explicit parameter autocompletion in some IDEs. Parameters ---------- engine : {"polars", "pandas", "pandas[pyarrow]", "pyarrow"}, optional The backend to use for loading the dataset. **kwds : Any Additional arguments passed to the loader. Returns ------- DataFrame or Table The loaded dataset. Examples -------- >>> from altair.datasets import data >>> cars_df = data.cars.load(engine="polars") >>> movies_df = data.movies.load(engine="pandas") """ return self._call_impl(engine=engine, **kwds) def __repr__(self) -> str: return f"DatasetAccessor('{self._name}', default_engine='{self._backend}')" @t.overload def __call__( self, *, engine: t.Literal["polars"], **kwds: t.Any, ) -> pl.DataFrame: ... @t.overload def __call__( self, *, engine: t.Literal["pandas", "pandas[pyarrow]"], **kwds: t.Any, ) -> pd.DataFrame: ... @t.overload def __call__( self, *, engine: t.Literal["pyarrow"], **kwds: t.Any, ) -> pa.Table: ... @t.overload def __call__( self, *, engine: _Backend | None = None, **kwds: t.Any, ) -> t.Any: ... def __call__( self, *, engine: _Backend | None = None, **kwds: t.Any, ) -> t.Any: """ Load the dataset with the specified engine. Parameters ---------- engine : {{"polars", "pandas", "pandas[pyarrow]", "pyarrow"}}, optional The backend to use for loading the dataset. **kwds Additional arguments passed to the loader. Returns ------- The loaded dataset as a DataFrame/Table from the specified engine. Examples -------- >>> from altair.datasets import data >>> >>> # Load with default engine >>> df = data.cars() >>> >>> # Load with specific engine >>> df = data.cars(engine="polars") """ return self._call_impl(engine=engine, **kwds) class DataObject: """ Main data object that provides access to all datasets as attributes. This is the primary interface for loading Altair datasets. It provides a simple, intuitive way to access datasets with autocompletion support. Examples -------- >>> from altair.datasets import data >>> >>> # Access datasets as attributes with autocompletion >>> cars_df = data.cars() >>> movies_df = data.movies(engine="pandas") >>> >>> # Get URLs >>> cars_url = data.cars.url >>> movies_url = data.movies.url >>> >>> # Set default engine for all datasets >>> data.set_default_engine("polars") >>> penguins_df = data.penguins() # Uses polars engine >>> >>> # List available datasets >>> available_datasets = data.list_datasets() >>> print(f"Available datasets: {len(available_datasets)}") Available datasets: 72 """ def __init__(self, backend: _Backend = "pandas") -> None: self._backend: _Backend = backend self._accessors: dict[Dataset, DatasetAccessor] = {} self._dataset_names: list[Dataset | LiteralString] | None = None def _get_dataset_names(self) -> list[Dataset | LiteralString]: """Get the list of available dataset names from metadata.""" if self._dataset_names is None: try: from altair.datasets._cache import CsvCache cache = CsvCache() self._dataset_names = list(cache.mapping.keys()) except Exception: # Fallback if metadata is not available self._dataset_names = [] return self._dataset_names def __dir__(self) -> list[str]: """Return list of available attributes for autocompletion.""" standard_attrs = list(super().__dir__()) dataset_names = self._get_dataset_names() return standard_attrs + dataset_names def __getattr__(self, name: Dataset) -> DatasetAccessor: # type: ignore[misc] dataset_names = self._get_dataset_names() if name not in dataset_names: available_datasets = dataset_names[:10] error_msg = ( f"Dataset '{name}' not found. Available datasets: {available_datasets}" ) raise AttributeError(error_msg) self._accessors[name] = DatasetAccessor(name, self._backend) return self._accessors[name] def set_default_engine(self, engine: _Backend) -> None: """ Set the default engine for all datasets. Parameters ---------- engine : {"polars", "pandas", "pandas[pyarrow]", "pyarrow"} The backend to use as default for all datasets. Examples -------- >>> from altair.datasets import data >>> data.set_default_engine("polars") >>> # Now all datasets will use polars by default >>> cars_df = data.cars() # Uses polars >>> movies_df = data.movies() # Uses polars """ self._backend = engine # Clear cached accessors so they use the new default self._accessors.clear() def list_datasets(self) -> list[Dataset | LiteralString]: """ Get a list of all available dataset names. Returns ------- list[str] List of available dataset names. Examples -------- >>> from altair.datasets import data >>> datasets = data.list_datasets() >>> print(f"Available datasets: {len(datasets)}") Available datasets: 72 >>> print(datasets[:5]) # First 5 datasets ['airports', 'annual_precip', 'anscombe', 'barley', 'birdstrikes'] """ return self._get_dataset_names() def get_default_engine(self) -> _Backend: """ Get the current default engine. Returns ------- str The current default engine. Examples -------- >>> from altair.datasets import data >>> data.set_default_engine("pandas") >>> print(data.get_default_engine()) pandas >>> data.set_default_engine("polars") >>> print(data.get_default_engine()) polars """ return self._backend def __repr__(self) -> str: dataset_count = len(self._get_dataset_names()) return f"AltairDataObject(default_engine='{self._backend}', datasets={dataset_count})" data = DataObject() ================================================ FILE: altair/datasets/_exceptions.py ================================================ from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Sequence from altair.datasets._reader import _Backend from altair.datasets._typing import Metadata class AltairDatasetsError(Exception): @classmethod def from_url(cls, meta: Metadata, /) -> AltairDatasetsError: if meta["suffix"] == ".parquet": msg = ( f"{_failed_url(meta)}" f"{meta['suffix']!r} datasets require `vegafusion`.\n" "See upstream issue for details: https://github.com/vega/vega/issues/3961" ) else: msg = ( f"{cls.from_url.__qualname__}() called for " f"unimplemented extension: {meta['suffix']}\n\n{meta!r}" ) raise NotImplementedError(msg) return cls(msg) @classmethod def from_tabular(cls, meta: Metadata, backend_name: str, /) -> AltairDatasetsError: if meta["is_image"]: reason = "Image data is non-tabular." return cls(f"{_failed_tabular(meta)}{reason}{_suggest_url(meta)}") elif not meta["is_tabular"] or meta["suffix"] in {".arrow", ".parquet"}: if meta["suffix"] in {".arrow", ".parquet"}: install: tuple[str, ...] = "pyarrow", "polars" what = f"{meta['suffix']!r}" else: install = ("polars",) if meta["is_spatial"]: what = "Geospatial data" elif meta["is_json"]: what = "Non-tabular json" else: what = f"{meta['file_name']!r}" reason = _why(what, backend_name) return cls(f"{_failed_tabular(meta)}{reason}{_suggest_url(meta, *install)}") else: return cls(_implementation_not_found(meta)) @classmethod def from_priority(cls, priority: Sequence[_Backend], /) -> AltairDatasetsError: msg = f"Found no supported backend, searched:\n{priority!r}" return cls(msg) def module_not_found( backend_name: str, reqs: Sequence[str], missing: str ) -> ModuleNotFoundError: if len(reqs) == 1: depends = f"{reqs[0]!r} package" else: depends = ", ".join(f"{req!r}" for req in reqs) + " packages" msg = ( f"Backend {backend_name!r} requires the {depends}, but {missing!r} could not be found.\n" f"This can be installed with pip using:\n" f" pip install {missing}\n" f"Or with conda using:\n" f" conda install -c conda-forge {missing}" ) return ModuleNotFoundError(msg, name=missing) def _failed_url(meta: Metadata, /) -> str: return f"Unable to load {meta['file_name']!r} via url.\n" def _failed_tabular(meta: Metadata, /) -> str: return f"Unable to load {meta['file_name']!r} as tabular data.\n" def _why(what: str, backend_name: str, /) -> str: return f"{what} is not supported natively by {backend_name!r}." def _suggest_url(meta: Metadata, *install_other: str) -> str: other = "" if install_other: others = " or ".join(f"`{other}`" for other in install_other) other = f" installing {others}, or use" return ( f"\n\nInstead, try{other}:\n" " from altair.datasets import data\n" f" data.{meta['dataset_name']}.url" ) def _implementation_not_found(meta: Metadata, /) -> str: """Search finished without finding a *declared* incompatibility.""" INDENT = " " * 4 record = f",\n{INDENT}".join( f"{k}={v!r}" for k, v in meta.items() if not (k.startswith(("is_", "sha", "bytes", "has_"))) or (v is True and k.startswith("is_")) ) return f"Found no implementation that supports:\n{INDENT}{record}" ================================================ FILE: altair/datasets/_loader.py ================================================ from __future__ import annotations import typing as t from typing import Generic, final, overload from altair.datasets import _reader from altair.datasets._reader import IntoDataFrameT, IntoLazyFrameT if t.TYPE_CHECKING: import sys from typing import Any, Literal import pandas as pd import polars as pl import pyarrow as pa from altair.datasets._cache import DatasetCache from altair.datasets._reader import Reader if sys.version_info >= (3, 11): from typing import LiteralString, Self else: from typing_extensions import LiteralString, Self from altair.datasets._reader import _Backend from altair.datasets._typing import Dataset, Extension __all__ = ["Loader", "load"] class Loader(Generic[IntoDataFrameT, IntoLazyFrameT]): """ Load example datasets *remotely* from `vega-datasets`_, with caching. A new ``Loader`` must be initialized by specifying a backend:: from altair.datasets import Loader load = Loader.from_backend("polars") load Loader[polars] .. _vega-datasets: https://github.com/vega/vega-datasets """ _reader: Reader[IntoDataFrameT, IntoLazyFrameT] @overload @classmethod def from_backend( cls, backend_name: Literal["polars"] = ..., / ) -> Loader[pl.DataFrame, pl.LazyFrame]: ... @overload @classmethod def from_backend( cls, backend_name: Literal["pandas", "pandas[pyarrow]"], / ) -> Loader[pd.DataFrame]: ... @overload @classmethod def from_backend(cls, backend_name: Literal["pyarrow"], /) -> Loader[pa.Table]: ... @classmethod def from_backend( cls: type[Loader[Any, Any]], backend_name: _Backend = "polars", / ) -> Loader[Any, Any]: """ Initialize a new loader, with the specified backend. Parameters ---------- backend_name DataFrame package/config used to return data. * *polars*: Using `polars defaults`_ * *pandas*: Using `pandas defaults`_. * *pandas[pyarrow]*: Using ``dtype_backend="pyarrow"`` * *pyarrow*: (*Experimental*) .. warning:: Most datasets use a `JSON format not supported`_ by ``pyarrow`` Examples -------- Using ``polars``:: from altair.datasets import Loader load = Loader.from_backend("polars") cars = load("cars") type(cars) polars.dataframe.frame.DataFrame Using ``pandas``:: load = Loader.from_backend("pandas") cars = load("cars") type(cars) pandas.core.frame.DataFrame Using ``pandas``, backed by ``pyarrow`` dtypes:: load = Loader.from_backend("pandas[pyarrow]") co2 = load("co2") type(co2) pandas.core.frame.DataFrame co2.dtypes Date datetime64[ns] CO2 double[pyarrow] adjusted CO2 double[pyarrow] dtype: object .. _polars defaults: https://docs.pola.rs/api/python/stable/reference/io.html .. _pandas defaults: https://pandas.pydata.org/docs/reference/io.html .. _JSON format not supported: https://arrow.apache.org/docs/python/json.html#reading-json-files """ return cls.from_reader(_reader._from_backend(backend_name)) @classmethod def from_reader(cls, reader: Reader[IntoDataFrameT, IntoLazyFrameT], /) -> Self: obj = cls.__new__(cls) obj._reader = reader return obj def __call__( self, name: Dataset | LiteralString, suffix: Extension | None = None, /, **kwds: Any, ) -> IntoDataFrameT: """ Get a remote dataset and load as tabular data. Parameters ---------- name Name of the dataset/`Path.stem`_. suffix File extension/`Path.suffix`_. .. note:: Only needed if ``name`` is available in multiple formats. **kwds Arguments passed to the underlying read function. Examples -------- Using ``polars``:: from altair.datasets import Loader load = Loader.from_backend("polars") source = load("iowa_electricity") source.columns ['year', 'source', 'net_generation'] source.head(5) shape: (5, 3) ┌────────────┬──────────────┬────────────────┐ │ year ┆ source ┆ net_generation │ │ --- ┆ --- ┆ --- │ │ date ┆ str ┆ i64 │ ╞════════════╪══════════════╪════════════════╡ │ 2001-01-01 ┆ Fossil Fuels ┆ 35361 │ │ 2002-01-01 ┆ Fossil Fuels ┆ 35991 │ │ 2003-01-01 ┆ Fossil Fuels ┆ 36234 │ │ 2004-01-01 ┆ Fossil Fuels ┆ 36205 │ │ 2005-01-01 ┆ Fossil Fuels ┆ 36883 │ └────────────┴──────────────┴────────────────┘ Using ``pandas``:: load = Loader.from_backend("pandas") source = load("iowa_electricity") source.columns Index(['year', 'source', 'net_generation'], dtype='object') source.head(5) year source net_generation 0 2001-01-01 Fossil Fuels 35361 1 2002-01-01 Fossil Fuels 35991 2 2003-01-01 Fossil Fuels 36234 3 2004-01-01 Fossil Fuels 36205 4 2005-01-01 Fossil Fuels 36883 Using ``pyarrow``:: load = Loader.from_backend("pyarrow") source = load("iowa_electricity") source.column_names ['year', 'source', 'net_generation'] source.slice(0, 5) pyarrow.Table year: date32[day] source: string net_generation: int64 ---- year: [[2001-01-01,2002-01-01,2003-01-01,2004-01-01,2005-01-01]] source: [["Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels","Fossil Fuels"]] net_generation: [[35361,35991,36234,36205,36883]] .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem .. _Path.suffix: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix """ return self._reader.dataset(name, suffix, **kwds) def url( self, name: Dataset | LiteralString, suffix: Extension | None = None, /, ) -> str: """ Return the address of a remote dataset. Parameters ---------- name Name of the dataset/`Path.stem`_. suffix File extension/`Path.suffix`_. .. note:: Only needed if ``name`` is available in multiple formats. .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem .. _Path.suffix: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix Examples -------- The returned url will always point to an accessible dataset:: import altair as alt from altair.datasets import Loader load = Loader.from_backend("polars") load.url("cars") "https://cdn.jsdelivr.net/npm/vega-datasets@v2.11.0/data/cars.json" We can pass the result directly to a chart:: url = load.url("cars") alt.Chart(url).mark_point().encode(x="Horsepower:Q", y="Miles_per_Gallon:Q") """ return self._reader.url(name, suffix) @property def cache(self) -> DatasetCache: """ Caching of remote dataset requests. Configure cache path:: self.cache.path = "..." Download the latest datasets *ahead-of-time*:: self.cache.download_all() Remove all downloaded datasets:: self.cache.clear() Disable caching:: self.cache.path = None """ return self._reader.cache def __repr__(self) -> str: return f"{type(self).__name__}[{self._reader._name}]" @final class _Load(Loader[IntoDataFrameT, IntoLazyFrameT]): @overload def __call__( # pyright: ignore[reportOverlappingOverload] self, name: Dataset | LiteralString, suffix: Extension | None = ..., /, backend: None = ..., **kwds: Any, ) -> IntoDataFrameT: ... @overload def __call__( self, name: Dataset | LiteralString, suffix: Extension | None = ..., /, backend: Literal["polars"] = ..., **kwds: Any, ) -> pl.DataFrame: ... @overload def __call__( self, name: Dataset | LiteralString, suffix: Extension | None = ..., /, backend: Literal["pandas", "pandas[pyarrow]"] = ..., **kwds: Any, ) -> pd.DataFrame: ... @overload def __call__( self, name: Dataset | LiteralString, suffix: Extension | None = ..., /, backend: Literal["pyarrow"] = ..., **kwds: Any, ) -> pa.Table: ... def __call__( self, name: Dataset | LiteralString, suffix: Extension | None = None, /, backend: _Backend | None = None, **kwds: Any, ) -> IntoDataFrameT | pl.DataFrame | pd.DataFrame | pa.Table: if backend is None: return super().__call__(name, suffix, **kwds) else: return self.from_backend(backend)(name, suffix, **kwds) load: _Load[Any, Any] def __getattr__(name): if name == "load": reader = _reader.infer_backend() global load load = _Load.from_reader(reader) return load else: msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) ================================================ FILE: altair/datasets/_reader.py ================================================ """ Backend for ``alt.datasets.Loader``. Notes ----- Extending would be more ergonomic if `read`, `scan`, `_constraints` were available under a single export:: from altair.datasets import ext, reader import polars as pl impls = ( ext.read(pl.read_parquet, ext.is_parquet), ext.read(pl.read_csv, ext.is_csv), ext.read(pl.read_json, ext.is_json), ) user_reader = reader(impls) user_reader.dataset("airports") """ from __future__ import annotations from collections import Counter from collections.abc import Mapping from importlib import import_module from importlib.util import find_spec from itertools import chain from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, overload from urllib.request import build_opener as _build_opener from narwhals.stable import v1 as nw from packaging.requirements import Requirement from altair.datasets import _readimpl from altair.datasets._cache import CsvCache, DatasetCache, SchemaCache, _iter_metadata from altair.datasets._constraints import is_parquet from altair.datasets._exceptions import AltairDatasetsError, module_not_found from altair.datasets._readimpl import IntoDataFrameT, IntoLazyFrameT, is_available if TYPE_CHECKING: import sys from collections.abc import Callable, Sequence from urllib.request import OpenerDirector import pandas as pd import polars as pl import pyarrow as pa from narwhals.stable.v1.typing import IntoExpr from altair.datasets._readimpl import BaseImpl, R, Read, Scan from altair.datasets._typing import Dataset, Extension, Metadata from altair.vegalite.v6.schema._typing import OneOrSeq if sys.version_info >= (3, 13): from typing import TypeIs, TypeVar else: from typing_extensions import TypeIs, TypeVar if sys.version_info >= (3, 12): from typing import Unpack else: from typing_extensions import Unpack if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString from typing import TypeAlias _Polars: TypeAlias = Literal["polars"] _Pandas: TypeAlias = Literal["pandas"] _PyArrow: TypeAlias = Literal["pyarrow"] _PandasAny: TypeAlias = Literal[_Pandas, "pandas[pyarrow]"] _Backend: TypeAlias = Literal[_Polars, _PandasAny, _PyArrow] _CuDF: TypeAlias = Literal["cudf"] _Dask: TypeAlias = Literal["dask"] _DuckDB: TypeAlias = Literal["duckdb"] _Ibis: TypeAlias = Literal["ibis"] _PySpark: TypeAlias = Literal["pyspark"] _NwSupport: TypeAlias = Literal[ _Polars, _Pandas, _PyArrow, _CuDF, _Dask, _DuckDB, _Ibis, _PySpark ] _NwSupportT = TypeVar( "_NwSupportT", _Polars, _Pandas, _PyArrow, _CuDF, _Dask, _DuckDB, _Ibis, _PySpark, ) _EagerAllowedImpl: TypeAlias = Literal[ nw.Implementation.PANDAS, nw.Implementation.POLARS, nw.Implementation.PYARROW, ] _EagerAllowed: TypeAlias = Literal[_Pandas, _Polars, _PyArrow] _SupportProfile: TypeAlias = Mapping[ Literal["supported", "unsupported"], "Sequence[Dataset]" ] """ Dataset support varies between backends and available dependencies. Any name listed in ``"unsupported"`` will raise an error on:: from altair.datasets import load load("7zip") Instead, they can be loaded via:: import altair as alt from altair.datasets import url alt.Chart(url("7zip")) """ class Reader(Generic[IntoDataFrameT, IntoLazyFrameT]): """ Modular file reader, targeting remote & local tabular resources. .. warning:: Use ``reader(...)`` instead of instantiating ``Reader`` directly. """ _read: Sequence[Read[IntoDataFrameT]] """Eager file read functions.""" _scan: Sequence[Scan[IntoLazyFrameT]] """Lazy file read functions.""" _name: str """ Used in error messages, repr and matching ``@overload``(s). Otherwise, has no concrete meaning. """ _implementation: _EagerAllowedImpl """ Corresponding `narwhals implementation`_. .. _narwhals implementation: https://github.com/narwhals-dev/narwhals/blob/9b6a355530ea46c590d5a6d1d0567be59c0b5742/narwhals/utils.py#L61-L290 """ _opener: ClassVar[OpenerDirector] = _build_opener() _metadata_path: ClassVar[Path] = ( Path(__file__).parent / "_metadata" / "metadata.parquet" ) def __init__( self, read: Sequence[Read[IntoDataFrameT]], scan: Sequence[Scan[IntoLazyFrameT]], name: str, implementation: _EagerAllowedImpl, ) -> None: self._read = read self._scan = scan self._name = name self._implementation = implementation self._schema_cache = SchemaCache(implementation=implementation) def __repr__(self) -> str: from textwrap import indent PREFIX = " " * 4 NL = "\n" body = f"read\n{indent(NL.join(str(el) for el in self._read), PREFIX)}" if self._scan: body += f"\nscan\n{indent(NL.join(str(el) for el in self._scan), PREFIX)}" return f"Reader[{self._name}] {self._implementation!r}\n{body}" def read_fn(self, meta: Metadata, /) -> Callable[..., IntoDataFrameT]: return self._solve(meta, self._read) def scan_fn(self, meta: Metadata | Path | str, /) -> Callable[..., IntoLazyFrameT]: meta = meta if isinstance(meta, Mapping) else {"suffix": _into_suffix(meta)} return self._solve(meta, self._scan) @property def cache(self) -> DatasetCache: return DatasetCache(self) def _handle_pyarrow_date_error(self, e: Exception, name: str) -> None: """Handle PyArrow date parsing errors with informative error messages, see https://github.com/apache/arrow/issues/41488.""" if "CSV conversion error to date" in str(e) and "pyarrow" in str( type(e).__module__ ): message = ( f"PyArrow cannot parse date format in dataset '{name}'. " f"This is a known limitation of PyArrow's CSV reader for non-ISO date formats.\n\n" f"Alternatives:\n" f"1. Use a different backend: data.{name}(engine='pandas') or data.{name}(engine='polars')\n" f"2. Convert dates manually after loading as strings\n\n" f"Original error: {e}" ) raise AltairDatasetsError(message) from e raise e def dataset( self, name: Dataset | LiteralString, suffix: Extension | None = None, /, **kwds: Any, ) -> IntoDataFrameT: frame = self._query(name, suffix) meta = next(_iter_metadata(frame)) fn = self.read_fn(meta) fn_kwds = self._merge_kwds(meta, kwds) if self.cache.is_active(): fp = self.cache._maybe_download(meta) try: return fn(fp, **fn_kwds) except Exception as e: self._handle_pyarrow_date_error(e, name) raise else: with self._opener.open(meta["url"]) as f: try: return fn(f, **fn_kwds) except Exception as e: self._handle_pyarrow_date_error(e, name) raise def url( self, name: Dataset | LiteralString, suffix: Extension | None = None, / ) -> str: frame = self._query(name, suffix) meta = next(_iter_metadata(frame)) if is_parquet(meta.items()) and not is_available("vegafusion"): raise AltairDatasetsError.from_url(meta) url = meta["url"] if isinstance(url, str): return url else: msg = f"Expected 'str' but got {type(url).__name__!r}\nfrom {url!r}." raise TypeError(msg) # TODO: (Multiple) # - Settle on a better name # - Add method to `Loader` # - Move docs to `Loader.{new name}` def open_markdown(self, name: Dataset, /) -> None: """ Learn more about a dataset, opening `vega-datasets/datapackage.md`_ with the default browser. Additional info *may* include: `description`_, `schema`_, `sources`_, `licenses`_. .. _vega-datasets/datapackage.md: https://github.com/vega/vega-datasets/blob/main/datapackage.md .. _description: https://datapackage.org/standard/data-resource/#description .. _schema: https://datapackage.org/standard/table-schema/#schema .. _sources: https://datapackage.org/standard/data-package/#sources .. _licenses: https://datapackage.org/standard/data-package/#licenses """ import webbrowser from altair.utils import VERSIONS ref = self._query(name).get_column("file_name").item(0).replace(".", "") tag = VERSIONS["vega-datasets"] url = f"https://github.com/vega/vega-datasets/blob/v{tag}/datapackage.md#{ref}" webbrowser.open(url) @overload def profile(self, *, show: Literal[False] = ...) -> _SupportProfile: ... @overload def profile(self, *, show: Literal[True]) -> None: ... def profile(self, *, show: bool = False) -> _SupportProfile | None: """ Describe which datasets can be loaded as tabular data. Parameters ---------- show Print a densely formatted repr *instead of* returning a mapping. """ relevant_columns = set( chain.from_iterable(impl._relevant_columns for impl in self._read) ) frame = self._scan_metadata().select("dataset_name", *relevant_columns) inc_expr = nw.any_horizontal(impl._include_expr for impl in self._read) result: _SupportProfile = { "unsupported": _dataset_names(frame, ~inc_expr), "supported": _dataset_names(frame, inc_expr), } if show: import pprint pprint.pprint(result, compact=True, sort_dicts=False) return None return result def _query( self, name: Dataset | LiteralString, suffix: Extension | None = None, / ) -> nw.DataFrame[IntoDataFrameT]: """ Query a tabular version of `vega-datasets/datapackage.json`_. Applies a filter, erroring out when no results would be returned. .. _vega-datasets/datapackage.json: https://github.com/vega/vega-datasets/blob/main/datapackage.json """ constraints = _into_constraints(name, suffix) frame = self._scan_metadata(**constraints).collect() if not frame.is_empty(): return frame else: msg = f"Found no results for:\n {constraints!r}" raise ValueError(msg) def _merge_kwds(self, meta: Metadata, kwds: dict[str, Any], /) -> Mapping[str, Any]: """ Extend user-provided arguments with dataset & library-specfic defaults. .. important:: User-provided arguments have a higher precedence. """ if self._schema_cache.is_active() and ( schema := self._schema_cache.schema_kwds(meta) ): kwds = schema | kwds if kwds else schema return kwds @property def _metadata_frame(self) -> nw.LazyFrame[IntoLazyFrameT]: fp = self._metadata_path return nw.from_native(self.scan_fn(fp)(fp)).lazy() def _scan_metadata( self, *predicates: OneOrSeq[IntoExpr], **constraints: Unpack[Metadata] ) -> nw.LazyFrame[IntoLazyFrameT]: if predicates or constraints: return self._metadata_frame.filter(*predicates, **constraints) return self._metadata_frame def _solve( self, meta: Metadata, impls: Sequence[BaseImpl[R]], / ) -> Callable[..., R]: """ Return the first function that satisfies dataset constraints. See Also -------- ``altair.datasets._readimpl.BaseImpl.unwrap_or_skip`` """ items = meta.items() it = (some for impl in impls if (some := impl.unwrap_or_skip(items))) if fn_or_err := next(it, None): if _is_err(fn_or_err): raise fn_or_err.from_tabular(meta, self._name) return fn_or_err raise AltairDatasetsError.from_tabular(meta, self._name) def _dataset_names( frame: nw.LazyFrame, *predicates: OneOrSeq[IntoExpr] ) -> Sequence[Dataset]: # NOTE: helper function for `Reader.profile` return ( frame.filter(*predicates) .select("dataset_name") .collect() .get_column("dataset_name") .to_list() ) class _NoParquetReader(Reader[IntoDataFrameT]): def __repr__(self) -> str: return f"{super().__repr__()}\ncsv_cache\n {self.csv_cache!r}" @property def csv_cache(self) -> CsvCache: if not hasattr(self, "_csv_cache"): self._csv_cache = CsvCache() return self._csv_cache @property def _metadata_frame(self) -> nw.LazyFrame[Any]: data = self.csv_cache.rotated impl = self._implementation return nw.maybe_convert_dtypes(nw.from_dict(data, backend=impl)).lazy() @overload def reader( read_fns: Sequence[Read[IntoDataFrameT]], scan_fns: tuple[()] = ..., *, name: str | None = ..., implementation: nw.Implementation = ..., ) -> Reader[IntoDataFrameT]: ... @overload def reader( read_fns: Sequence[Read[IntoDataFrameT]], scan_fns: Sequence[Scan[IntoLazyFrameT]], *, name: str | None = ..., implementation: nw.Implementation = ..., ) -> Reader[IntoDataFrameT, IntoLazyFrameT]: ... def reader( read_fns: Sequence[Read[IntoDataFrameT]], scan_fns: Sequence[Scan[IntoLazyFrameT]] = (), *, name: str | None = None, implementation: nw.Implementation = nw.Implementation.UNKNOWN, ) -> Reader[IntoDataFrameT, IntoLazyFrameT] | Reader[IntoDataFrameT]: name = name or Counter(el._inferred_package for el in read_fns).most_common(1)[0][0] if not _is_eager_allowed(implementation): implementation = _into_implementation(Requirement(name)) if scan_fns: return Reader(read_fns, scan_fns, name, implementation) if stolen := _steal_eager_parquet(read_fns): return Reader(read_fns, stolen, name, implementation) else: return _NoParquetReader[IntoDataFrameT](read_fns, (), name, implementation) def infer_backend( *, priority: Sequence[_Backend] = ("polars", "pandas[pyarrow]", "pandas", "pyarrow") ) -> Reader[Any, Any]: """ Return the first available reader in order of `priority`. Notes ----- - ``"polars"``: can natively load every dataset (including ``(Geo|Topo)JSON``) - ``"pandas[pyarrow]"``: can load *most* datasets, guarantees ``.parquet`` support - ``"pandas"``: supports ``.parquet``, if `fastparquet`_ is installed - ``"pyarrow"``: least reliable .. _fastparquet: https://github.com/dask/fastparquet """ it = (_from_backend(name) for name in priority if is_available(_requirements(name))) if reader := next(it, None): return reader raise AltairDatasetsError.from_priority(priority) @overload def _from_backend(name: _Polars, /) -> Reader[pl.DataFrame, pl.LazyFrame]: ... @overload def _from_backend(name: _PandasAny, /) -> Reader[pd.DataFrame]: ... @overload def _from_backend(name: _PyArrow, /) -> Reader[pa.Table]: ... # FIXME: The order this is defined in makes splitting the module complicated # - Can't use a classmethod, since some result in a subclass used def _from_backend(name: _Backend, /) -> Reader[Any, Any]: """ Reader initialization dispatcher. FIXME: Works, but defining these in mixed shape functions seems off. """ if not _is_backend(name): msg = f"Unknown backend {name!r}" raise TypeError(msg) implementation = _into_implementation(name) if name == "polars": rd, sc = _readimpl.pl_only() return reader(rd, sc, name=name, implementation=implementation) elif name == "pandas[pyarrow]": return reader(_readimpl.pd_pyarrow(), name=name, implementation=implementation) elif name == "pandas": return reader(_readimpl.pd_only(), name=name, implementation=implementation) elif name == "pyarrow": return reader(_readimpl.pa_any(), name=name, implementation=implementation) def _is_backend(obj: Any) -> TypeIs[_Backend]: return obj in {"polars", "pandas", "pandas[pyarrow]", "pyarrow"} def _is_err(obj: Any) -> TypeIs[type[AltairDatasetsError]]: return obj is AltairDatasetsError def _into_constraints( name: Dataset | LiteralString, suffix: Extension | None, / ) -> Metadata: """Transform args into a mapping to column names.""" m: Metadata = {} if "." in name: m["file_name"] = name elif suffix is None: m["dataset_name"] = name elif suffix.startswith("."): m = {"dataset_name": name, "suffix": suffix} else: from typing import get_args from altair.datasets._typing import Extension msg = ( f"Expected 'suffix' to be one of {get_args(Extension)!r},\n" f"but got: {suffix!r}" ) raise TypeError(msg) return m def _is_eager_allowed(impl: nw.Implementation, /) -> TypeIs[_EagerAllowedImpl]: return impl in { nw.Implementation.PANDAS, nw.Implementation.POLARS, nw.Implementation.PYARROW, } def _into_implementation( backend: _NwSupport | _PandasAny | nw.Implementation | Requirement, / ) -> _EagerAllowedImpl: req = ( Requirement(str(backend)) if isinstance(backend, nw.Implementation) else backend ) primary = _import_guarded(req) impl = nw.Implementation.from_backend(primary) if not _is_eager_allowed(impl): if impl is nw.Implementation.UNKNOWN: msg = f"Package {primary!r} is not supported by `narwhals`." raise ValueError(msg) raise NotImplementedError(impl) return impl def _into_suffix(obj: Path | str, /) -> Any: if isinstance(obj, Path): return obj.suffix elif isinstance(obj, str): return obj else: msg = f"Unexpected type {type(obj).__name__!r}" raise TypeError(msg) def _steal_eager_parquet( read_fns: Sequence[Read[IntoDataFrameT]], / ) -> Sequence[Scan[Any]] | None: if convertable := next((rd for rd in read_fns if rd.include <= is_parquet), None): return (_readimpl.into_scan(convertable),) return None @overload def _import_guarded(req: _PandasAny, /) -> _Pandas: ... @overload def _import_guarded(req: _NwSupportT, /) -> _NwSupportT: ... @overload def _import_guarded(req: Requirement, /) -> LiteralString: ... def _import_guarded(req: Any, /) -> LiteralString: requires = _requirements(req) for name in requires: if spec := find_spec(name): import_module(spec.name) else: raise module_not_found(str(req), requires, missing=name) return requires[0] def _requirements(req: Requirement | str, /) -> tuple[Any, ...]: req = Requirement(req) if isinstance(req, str) else req return (req.name, *req.extras) ================================================ FILE: altair/datasets/_readimpl.py ================================================ """Individual read functions and siuations they support.""" from __future__ import annotations import sys from enum import Enum from functools import partial, wraps from importlib.util import find_spec from itertools import chain from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any, Generic, Literal from narwhals.stable import v1 as nw from narwhals.stable.v1.dependencies import get_pandas, get_polars from altair.datasets._constraints import ( is_arrow, is_csv, is_json, is_meta, is_not_tabular, is_parquet, is_spatial, is_topo, is_tsv, ) from altair.datasets._exceptions import AltairDatasetsError if sys.version_info >= (3, 13): from typing import TypeVar else: from typing_extensions import TypeVar if sys.version_info >= (3, 12): from typing import TypeAliasType else: from typing_extensions import TypeAliasType if TYPE_CHECKING: from collections.abc import Callable, Iterable, Iterator, Sequence from io import IOBase from types import ModuleType import pandas as pd import polars as pl import pyarrow as pa from narwhals.stable.v1 import typing as nwt from altair.datasets._constraints import Items, MetaIs __all__ = ["is_available", "pa_any", "pd_only", "pd_pyarrow", "pl_only", "read", "scan"] R = TypeVar( "R", bound="nwt.IntoDataFrame | nwt.IntoLazyFrame", covariant=True, ) IntoDataFrameT = TypeVar("IntoDataFrameT", bound="nwt.IntoDataFrame") IntoLazyFrameT = TypeVar( "IntoLazyFrameT", bound="nwt.IntoLazyFrame", default=Any, ) Read = TypeAliasType("Read", "BaseImpl[IntoDataFrameT]", type_params=(IntoDataFrameT,)) """An *eager* file read function.""" Scan = TypeAliasType("Scan", "BaseImpl[IntoLazyFrameT]", type_params=(IntoLazyFrameT,)) """A *lazy* file read function.""" class Skip(Enum): """Falsy sentinel.""" skip = 0 def __bool__(self) -> Literal[False]: return False def __repr__(self) -> Literal[""]: return "" class BaseImpl(Generic[R]): """ A function wrapped with dataset support constraints. The ``include``, ``exclude`` properties form a `NIMPLY gate`_ (`Material nonimplication`_). Examples -------- For some dataset ``D``, we can use ``fn`` if:: impl: BaseImpl impl.include(D) and not impl.exclude(D) .. _NIMPLY gate: https://en.m.wikipedia.org/wiki/NIMPLY_gate .. _Material nonimplication: https://en.m.wikipedia.org/wiki/Material_nonimplication#Truth_table """ fn: Callable[..., R] """Wrapped read/scan function.""" include: MetaIs """Constraint indicating ``fn`` **supports** reading a dataset.""" exclude: MetaIs """Constraint *subsetting* ``include`` to mark **non-support**.""" def __init__( self, fn: Callable[..., R], include: MetaIs, exclude: MetaIs | None, kwds: dict[str, Any], /, ) -> None: exclude = exclude or self._exclude_none() if not include.isdisjoint(exclude): intersection = ", ".join(f"{k}={v!r}" for k, v in include & exclude) msg = f"Constraints overlap at: `{intersection}`\ninclude={include!r}\nexclude={exclude!r}" raise TypeError(msg) object.__setattr__(self, "fn", partial(fn, **kwds) if kwds else fn) object.__setattr__(self, "include", include) object.__setattr__(self, "exclude", exclude) def unwrap_or_skip( self, meta: Items, / ) -> Callable[..., R] | type[AltairDatasetsError] | Skip: """ Indicate an action to take for a dataset. **Supports** dataset, use this function:: Callable[..., R] Has explicitly marked as **not supported**:: type[AltairDatasetsError] No relevant constraints overlap, safe to check others:: Skip """ if self.include.issubset(meta): return self.fn if self.exclude.isdisjoint(meta) else AltairDatasetsError return Skip.skip @classmethod def _exclude_none(cls) -> MetaIs: """Represents the empty set.""" return is_meta() def __setattr__(self, name: str, value: Any): msg = ( f"{type(self).__name__!r} is immutable.\n" f"Could not assign self.{name} = {value}" ) raise TypeError(msg) @property def _inferred_package(self) -> str: return _root_package_name(_unwrap_partial(self.fn), "UNKNOWN") def __repr__(self) -> str: tp_name = f"{type(self).__name__}[{self._inferred_package}?]" return f"{tp_name}({self})" def __str__(self) -> str: if isinstance(self.fn, partial): fn = _unwrap_partial(self.fn) kwds = self.fn.keywords.items() fn_repr = f"{fn.__name__}(..., {', '.join(f'{k}={v!r}' for k, v in kwds)})" else: fn_repr = f"{self.fn.__name__}(...)" inc, exc = self.include, self.exclude return f"{fn_repr}, {f'include={inc!r}, exclude={exc!r}' if exc else repr(inc)}" @property def _relevant_columns(self) -> Iterator[str]: name = itemgetter(0) yield from (name(obj) for obj in chain(self.include, self.exclude)) @property def _include_expr(self) -> nw.Expr: return ( self.include.to_expr() & ~self.exclude.to_expr() if self.exclude else self.include.to_expr() ) @property def _exclude_expr(self) -> nw.Expr: if self.exclude: return self.include.to_expr() & self.exclude.to_expr() msg = f"Unable to generate an exclude expression without setting exclude\n\n{self!r}" raise TypeError(msg) def read( fn: Callable[..., IntoDataFrameT], /, include: MetaIs, exclude: MetaIs | None = None, **kwds: Any, ) -> Read[IntoDataFrameT]: return BaseImpl(fn, include, exclude, kwds) def scan( fn: Callable[..., IntoLazyFrameT], /, include: MetaIs, exclude: MetaIs | None = None, **kwds: Any, ) -> Scan[IntoLazyFrameT]: return BaseImpl(fn, include, exclude, kwds) def into_scan(impl: Read[IntoDataFrameT], /) -> Scan[Any]: def scan_fn(fn: Callable[..., IntoDataFrameT], /) -> Callable[..., Any]: @wraps(_unwrap_partial(fn)) def wrapper(*args: Any, **kwds: Any) -> nw.LazyFrame[Any]: return nw.from_native(fn(*args, **kwds)).lazy() return wrapper return scan(scan_fn(impl.fn), impl.include, impl.exclude) def is_available( pkg_names: str | Iterable[str], *more_pkg_names: str, require_all: bool = True ) -> bool: """ Check for importable package(s), without raising on failure. Parameters ---------- pkg_names, more_pkg_names One or more packages. require_all * ``True`` every package. * ``False`` at least one package. """ if not more_pkg_names and isinstance(pkg_names, str): return find_spec(pkg_names) is not None pkgs_names = pkg_names if not isinstance(pkg_names, str) else (pkg_names,) names = chain(pkgs_names, more_pkg_names) fn = all if require_all else any return fn(find_spec(name) is not None for name in names) def _root_package_name(obj: Any, default: str, /) -> str: # NOTE: Defers importing `inspect`, if we can get the module name if hasattr(obj, "__module__"): return obj.__module__.split(".")[0] else: from inspect import getmodule module = getmodule(obj) if module and (pkg := module.__package__): return pkg.split(".")[0] return default def _unwrap_partial(fn: Any, /) -> Any: # NOTE: ``functools._unwrap_partial`` func = fn while isinstance(func, partial): func = func.func return func def pl_only() -> tuple[Sequence[Read[pl.DataFrame]], Sequence[Scan[pl.LazyFrame]]]: # pyright: ignore[reportInvalidTypeForm] import polars as pl pl_read_json = read(_pl_read_json_roundtrip(get_polars()), is_json) if is_available("polars_st"): fn_json: Sequence[Read[pl.DataFrame]] = ( _pl_read_json_polars_st_topo_impl(), # TopoJSON files first _pl_read_json_polars_st_impl(), # Then other spatial JSON pl_read_json, ) else: fn_json = (pl_read_json,) read_fns = ( read(pl.read_csv, is_csv, try_parse_dates=True), *fn_json, read(pl.read_csv, is_tsv, separator="\t", try_parse_dates=True), read(pl.read_ipc, is_arrow), read(pl.read_parquet, is_parquet), ) scan_fns = (scan(pl.scan_parquet, is_parquet),) return read_fns, scan_fns def pd_only() -> Sequence[Read[pd.DataFrame]]: import pandas as pd opt: Sequence[Read[pd.DataFrame]] if is_available("pyarrow"): opt = read(pd.read_feather, is_arrow), read(pd.read_parquet, is_parquet) elif is_available("fastparquet"): opt = (read(pd.read_parquet, is_parquet),) else: opt = () pd_read_json = read(_pd_read_json(get_pandas()), is_json, exclude=is_spatial) if is_available("geopandas"): fn_json: Sequence[Read[pd.DataFrame]] = ( _pd_read_json_geopandas_impl(), pd_read_json, ) else: fn_json = (pd_read_json,) return ( read(pd.read_csv, is_csv), *fn_json, read(pd.read_csv, is_tsv, sep="\t"), *opt, ) def pd_pyarrow() -> Sequence[Read[pd.DataFrame]]: import pandas as pd kwds: dict[str, Any] = {"dtype_backend": "pyarrow"} pd_read_json = read( _pd_read_json(get_pandas()), is_json, exclude=is_spatial, **kwds ) if is_available("geopandas"): fn_json: Sequence[Read[pd.DataFrame]] = ( _pd_read_json_geopandas_impl(), pd_read_json, ) else: fn_json = (pd_read_json,) return ( read(pd.read_csv, is_csv, **kwds), *fn_json, read(pd.read_csv, is_tsv, sep="\t", **kwds), read(pd.read_feather, is_arrow, **kwds), read(pd.read_parquet, is_parquet, **kwds), ) def pa_any() -> Sequence[Read[pa.Table]]: from pyarrow import csv, feather, parquet return ( read(csv.read_csv, is_csv), _pa_read_json_impl(), read(csv.read_csv, is_tsv, parse_options=csv.ParseOptions(delimiter="\t")), read(feather.read_table, is_arrow), read(parquet.read_table, is_parquet), ) def _pa_read_json_impl() -> Read[pa.Table]: """ Mitigating ``pyarrow``'s `line-delimited`_ JSON requirement. .. _line-delimited: https://arrow.apache.org/docs/python/json.html#reading-json-files """ if is_available("polars"): polars_ns = get_polars() if polars_ns is not None: return read(_pl_read_json_roundtrip_to_arrow(polars_ns), is_json) if is_available("pandas"): pandas_ns = get_pandas() if pandas_ns is not None: return read(_pd_read_json_to_arrow(pandas_ns), is_json, exclude=is_spatial) return read(_stdlib_read_json_to_arrow, is_json, exclude=is_not_tabular) def _pd_read_json(ns: ModuleType, /) -> Callable[..., pd.DataFrame]: @wraps(ns.read_json) def fn(source: Path | Any, /, **kwds: Any) -> pd.DataFrame: return _pd_fix_dtypes_nw(ns.read_json(source, **kwds), **kwds).to_native() return fn def _pd_read_json_geopandas_impl() -> Read[pd.DataFrame]: import geopandas @wraps(geopandas.read_file) def fn(source: Path | Any, /, schema: Any = None, **kwds: Any) -> pd.DataFrame: return geopandas.read_file(source, **kwds) return read(fn, is_meta(is_spatial=True, suffix=".json")) def _pd_fix_dtypes_nw( df: pd.DataFrame, /, *, dtype_backend: Any = None, **kwds: Any ) -> nw.DataFrame[pd.DataFrame]: kwds = {"dtype_backend": dtype_backend} if dtype_backend else {} return ( df.convert_dtypes(**kwds) .pipe(nw.from_native, eager_only=True) .with_columns(nw.selectors.by_dtype(nw.Object).cast(nw.String)) ) def _pd_read_json_to_arrow(ns: ModuleType, /) -> Callable[..., pa.Table]: @wraps(ns.read_json) def fn(source: Path | Any, /, *, schema: Any = None, **kwds: Any) -> pa.Table: """``schema`` is only here to swallow the ``SchemaCache`` if used.""" return ( ns.read_json(source, **kwds) .pipe(_pd_fix_dtypes_nw, dtype_backend="pyarrow") .to_arrow() ) return fn def _pl_read_json_polars_st_impl() -> Read[pl.DataFrame]: import polars_st as st @wraps(st.read_file) def fn(source: Path | Any, /, schema: Any = None, **kwds: Any) -> pl.DataFrame: return st.read_file(source, **kwds) return read(fn, is_meta(is_spatial=True, suffix=".json")) def _pl_read_json_polars_st_topo_impl() -> Read[pl.DataFrame]: import polars_st as st @wraps(st.read_file) def fn(source: Path | Any, /, schema: Any = None, **kwds: Any) -> pl.DataFrame: # Add TopoJSON driver prefix for URLs if isinstance(source, str) and source.startswith("http"): source = f"TopoJSON:{source}" return st.read_file(source, **kwds) return read(fn, is_topo) def _pl_read_json_roundtrip(ns: ModuleType, /) -> Callable[..., pl.DataFrame]: """ Try to utilize better date parsing available in `pl.read_csv`_. `pl.read_json`_ has few options when compared to `pl.read_csv`_. Chaining the two together - *where possible* - is still usually faster than `pandas.read_json`_. .. _pl.read_json: https://docs.pola.rs/api/python/stable/reference/api/polars.read_json.html .. _pl.read_csv: https://docs.pola.rs/api/python/stable/reference/api/polars.read_csv.html .. _pandas.read_json: https://pandas.pydata.org/docs/reference/api/pandas.read_json.html """ from io import BytesIO @wraps(ns.read_json) def fn(source: Path | IOBase, /, **kwds: Any) -> pl.DataFrame: df = ns.read_json(source, **kwds) if any(tp.is_nested() for tp in df.schema.dtypes()): return df buf = BytesIO() df.write_csv(buf) if kwds: SHARED_KWDS = {"schema", "schema_overrides", "infer_schema_length"} kwds = {k: v for k, v in kwds.items() if k in SHARED_KWDS} return ns.read_csv(buf, try_parse_dates=True, **kwds) return fn def _pl_read_json_roundtrip_to_arrow(ns: ModuleType, /) -> Callable[..., pa.Table]: eager = _pl_read_json_roundtrip(ns) @wraps(ns.read_json) def fn(source: Path | IOBase, /, **kwds: Any) -> pa.Table: return eager(source).to_arrow() return fn def _stdlib_read_json(source: Path | Any, /) -> Any: import json if not isinstance(source, Path): return json.load(source) else: with Path(source).open(encoding="utf-8") as f: return json.load(f) def _stdlib_read_json_to_arrow(source: Path | Any, /, **kwds: Any) -> pa.Table: import pyarrow as pa rows: list[dict[str, Any]] = _stdlib_read_json(source) try: return pa.Table.from_pylist(rows, **kwds) except TypeError: import csv import io from pyarrow import csv as pa_csv with io.StringIO() as f: writer = csv.DictWriter(f, rows[0].keys(), dialect=csv.unix_dialect) writer.writeheader() writer.writerows(rows) with io.BytesIO(f.getvalue().encode()) as f2: return pa_csv.read_csv(f2) ================================================ FILE: altair/datasets/_typing.py ================================================ # The contents of this file are automatically written by # tools/datasets.__init__.py. Do not modify directly. from __future__ import annotations import sys from typing import Literal, TypeAlias if sys.version_info >= (3, 15): from typing import TypedDict else: from typing_extensions import TypedDict if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString __all__ = ["Dataset", "Extension", "Metadata"] Dataset: TypeAlias = Literal[ "airports", "annual_precip", "anscombe", "barley", "birdstrikes", "budget", "budgets", "burtin", "cars", "co2_concentration", "countries", "crimea", "disasters", "driving", "earthquakes", "ffox", "flare", "flare_dependencies", "flights_10k", "flights_200k_arrow", "flights_200k_json", "flights_20k", "flights_2k", "flights_3m", "flights_5k", "flights_airport", "football", "gapminder", "gapminder_health_income", "gimp", "github", "global_temp", "icon_7zip", "income", "iowa_electricity", "jobs", "la_riots", "london_boroughs", "london_centroids", "london_tube_lines", "lookup_groups", "lookup_people", "miserables", "monarchs", "movies", "normal_2d", "obesity", "ohlc", "penguins", "platformer_terrain", "political_contributions", "population", "population_engineers_hurricanes", "seattle_weather", "seattle_weather_hourly_normals", "sp500", "sp500_2000", "species", "stocks", "udistrict", "unemployment", "unemployment_across_industries", "uniform_2d", "us_10m", "us_employment", "us_state_capitals", "volcano", "weather", "weekly_weather", "wheat", "windvectors", "world_110m", "zipcodes", ] Extension: TypeAlias = Literal[".arrow", ".csv", ".json", ".parquet", ".png", ".tsv"] class Metadata(TypedDict, total=False): """ Full schema for ``metadata.parquet``. Parameters ---------- dataset_name Name of the dataset from the resource name field. suffix File extension/`Path.suffix`_. file_name Equivalent to `Path.name`_. bytes File size in *bytes*. is_image Only accessible via url. is_tabular Can be read as tabular data. is_geo `GeoJSON`_ format. is_topo `TopoJSON`_ format. is_spatial Any geospatial format. Only natively supported by ``polars``. is_json Not supported natively by ``pyarrow``. has_schema Data types available for improved ``pandas`` parsing. sha Unique hash for the dataset. .. note:: E.g. if the dataset did *not* change between ``v1.0.0``-``v2.0.0``; then this value would remain stable. url Remote url used to access dataset. .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem .. _Path.name: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.name .. _Path.suffix: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffix .. _GeoJSON: https://en.wikipedia.org/wiki/GeoJSON .. _TopoJSON: https://en.wikipedia.org/wiki/GeoJSON#TopoJSON Examples -------- ``Metadata`` keywords form constraints to filter a table like the below sample: ``` shape: (73, 13) ┌────────────────┬────────┬────────────────┬───┬───────────────┬───────────────┐ │ dataset_name ┆ suffix ┆ file_name ┆ … ┆ sha ┆ url │ │ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ ┆ str ┆ str │ ╞════════════════╪════════╪════════════════╪═══╪═══════════════╪═══════════════╡ │ airports ┆ .csv ┆ airports.csv ┆ … ┆ 608ba6d51fa70 ┆ https://cdn.j │ │ ┆ ┆ ┆ ┆ 584c3fa1d31e… ┆ sdelivr.net/… │ │ annual_precip ┆ .json ┆ annual-precip. ┆ … ┆ 719e73406cfc0 ┆ https://cdn.j │ │ ┆ ┆ json ┆ ┆ 8f16dda65151… ┆ sdelivr.net/… │ │ anscombe ┆ .json ┆ anscombe.json ┆ … ┆ 11ae97090b626 ┆ https://cdn.j │ │ ┆ ┆ ┆ ┆ 3bdf0c866115… ┆ sdelivr.net/… │ │ barley ┆ .json ┆ barley.json ┆ … ┆ 8dc50de2509b6 ┆ https://cdn.j │ │ ┆ ┆ ┆ ┆ e197ce95c24c… ┆ sdelivr.net/… │ │ birdstrikes ┆ .csv ┆ birdstrikes.cs ┆ … ┆ 1b8b190c9bc02 ┆ https://cdn.j │ │ ┆ ┆ v ┆ ┆ ef7bcbfe5a8a… ┆ sdelivr.net/… │ │ … ┆ … ┆ … ┆ … ┆ … ┆ … │ │ weekly_weather ┆ .json ┆ weekly-weather ┆ … ┆ bd42a3e2403e7 ┆ https://cdn.j │ │ ┆ ┆ .json ┆ ┆ ccd6baaa89f9… ┆ sdelivr.net/… │ │ wheat ┆ .json ┆ wheat.json ┆ … ┆ cde46b43fc82f ┆ https://cdn.j │ │ ┆ ┆ ┆ ┆ 4c3c2a37ddcf… ┆ sdelivr.net/… │ │ windvectors ┆ .csv ┆ windvectors.cs ┆ … ┆ ed686b0ba613a ┆ https://cdn.j │ │ ┆ ┆ v ┆ ┆ bd59d09fcd94… ┆ sdelivr.net/… │ │ world_110m ┆ .json ┆ world-110m.jso ┆ … ┆ a1ce852de6f27 ┆ https://cdn.j │ │ ┆ ┆ n ┆ ┆ 13c94c0c2840… ┆ sdelivr.net/… │ │ zipcodes ┆ .csv ┆ zipcodes.csv ┆ … ┆ d3df33e12be0d ┆ https://cdn.j │ │ ┆ ┆ ┆ ┆ 0544c95f1bd4… ┆ sdelivr.net/… │ └────────────────┴────────┴────────────────┴───┴───────────────┴───────────────┘ ``` """ dataset_name: Dataset | LiteralString suffix: Extension file_name: str bytes: int is_image: bool is_tabular: bool is_geo: bool is_topo: bool is_spatial: bool is_json: bool has_schema: bool sha: str url: str FlFieldStr: TypeAlias = Literal[ "integer", "number", "boolean", "string", "object", "array", "date", "datetime", "time", "duration", ] """ String representation of `frictionless`_ `Field Types`_. .. _frictionless: https://github.com/frictionlessdata/frictionless-py .. _Field Types: https://datapackage.org/standard/table-schema/#field-types """ ================================================ FILE: altair/expr/__init__.py ================================================ # The contents of this file are automatically written by # tools/generate_schema_wrapper.py. Do not modify directly. """Tools for creating transform & filter expressions with a python syntax.""" from __future__ import annotations import sys from typing import TYPE_CHECKING, Any from altair.expr.core import ConstExpression, FunctionExpression from altair.vegalite.v6.schema.core import ExprRef as _ExprRef if sys.version_info >= (3, 12): from typing import override else: from typing_extensions import override if TYPE_CHECKING: from altair.expr.core import Expression, IntoExpression class _ExprMeta(type): """ Metaclass for :class:`expr`. Currently providing read-only class properties, representing JavaScript constants. """ @property def NaN(cls) -> Expression: """Not a number (same as JavaScript literal NaN).""" return ConstExpression("NaN") @property def LN10(cls) -> Expression: """The natural log of 10 (alias to Math.LN10).""" return ConstExpression("LN10") @property def E(cls) -> Expression: """The transcendental number e (alias to Math.E).""" return ConstExpression("E") @property def LOG10E(cls) -> Expression: """The base 10 logarithm e (alias to Math.LOG10E).""" return ConstExpression("LOG10E") @property def LOG2E(cls) -> Expression: """The base 2 logarithm of e (alias to Math.LOG2E).""" return ConstExpression("LOG2E") @property def SQRT1_2(cls) -> Expression: """The square root of 0.5 (alias to Math.SQRT1_2).""" return ConstExpression("SQRT1_2") @property def LN2(cls) -> Expression: """The natural log of 2 (alias to Math.LN2).""" return ConstExpression("LN2") @property def SQRT2(cls) -> Expression: """The square root of 2 (alias to Math.SQRT1_2).""" return ConstExpression("SQRT2") @property def PI(cls) -> Expression: """The transcendental number pi (alias to Math.PI).""" return ConstExpression("PI") class expr(_ExprRef, metaclass=_ExprMeta): """ Utility providing *constants* and *classmethods* to construct expressions. `Expressions`_ can be used to write basic formulas that enable custom interactions. Alternatively, an `inline expression`_ may be defined via :class:`expr()`. Parameters ---------- expr: str A `vega expression`_ string. Returns ------- ``ExprRef`` .. _Expressions: https://altair-viz.github.io/user_guide/interactions/expressions.html .. _inline expression: https://altair-viz.github.io/user_guide/interactions/expressions.html#inline-expressions .. _vega expression: https://vega.github.io/vega/docs/expressions/ Examples -------- >>> import altair as alt >>> bind_range = alt.binding_range(min=100, max=300, name="Slider value: ") >>> param_width = alt.param(bind=bind_range, name="param_width") >>> param_color = alt.param( ... expr=alt.expr.if_(param_width < 200, "red", "black"), ... name="param_color", ... ) >>> y = alt.Y("yval").axis(titleColor=param_color) >>> y Y({ axis: {'titleColor': Parameter('param_color', VariableParameter({ expr: if((param_width < 200),'red','black'), name: 'param_color' }))}, shorthand: 'yval' }) .. _Number.isNaN: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isNan .. _Number.isFinite: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isFinite .. _Math.abs: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/abs .. _Math.acos: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/acos .. _Math.asin: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/asin .. _Math.atan: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/atan .. _Math.atan2: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/atan2 .. _Math.ceil: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/ceil .. _Math.cos: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/cos .. _Math.exp: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/exp .. _Math.floor: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/floor .. _Math.hypot: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/hypot .. _Math.log: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/log .. _Math.max: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/max .. _Math.min: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/min .. _Math.pow: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/pow .. _Math.random: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/random .. _Math.round: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/round .. _Math.sin: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/sin .. _Math.sqrt: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/sqrt .. _Math.tan: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/tan .. _normal (Gaussian) probability distribution: https://en.wikipedia.org/wiki/Normal_distribution .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function .. _probability density function: https://en.wikipedia.org/wiki/Probability_density_function .. _log-normal probability distribution: https://en.wikipedia.org/wiki/Log-normal_distribution .. _continuous uniform probability distribution: https://en.wikipedia.org/wiki/Continuous_uniform_distribution .. _*unit*: https://vega.github.io/vega/docs/api/time/#time-units .. _ascending from Vega Utils: https://vega.github.io/vega/docs/api/util/#ascending .. _JavaScript's String.replace: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace .. _Base64: https://developer.mozilla.org/en-US/docs/Glossary/Base64 .. _ASCII: https://developer.mozilla.org/en-US/docs/Glossary/ASCII .. _Window.btoa(): https://developer.mozilla.org/en-US/docs/Web/API/Window/btoa .. _Window.atob(): https://developer.mozilla.org/en-US/docs/Web/API/Window/atob .. _d3-format specifier: https://github.com/d3/d3-format/ .. _*units*: https://vega.github.io/vega/docs/api/time/#time-units .. _timeUnitSpecifier API documentation: https://vega.github.io/vega/docs/api/time/#timeUnitSpecifier .. _timeFormat: https://vega.github.io/vega/docs/expressions/#timeFormat .. _utcFormat: https://vega.github.io/vega/docs/expressions/#utcFormat .. _d3-time-format specifier: https://github.com/d3/d3-time-format/ .. _TimeMultiFormat object: https://vega.github.io/vega/docs/types/#TimeMultiFormat .. _UTC: https://en.wikipedia.org/wiki/Coordinated_Universal_Time .. _JavaScript's RegExp: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp .. _RGB: https://en.wikipedia.org/wiki/RGB_color_model .. _d3-color's rgb function: https://github.com/d3/d3-color#rgb .. _HSL: https://en.wikipedia.org/wiki/HSL_and_HSV .. _d3-color's hsl function: https://github.com/d3/d3-color#hsl .. _CIE LAB: https://en.wikipedia.org/wiki/Lab_color_space#CIELAB .. _d3-color's lab function: https://github.com/d3/d3-color#lab .. _HCL: https://en.wikipedia.org/wiki/Lab_color_space#CIELAB .. _d3-color's hcl function: https://github.com/d3/d3-color#hcl .. _W3C Web Content Accessibility Guidelines: https://www.w3.org/TR/2008/REC-WCAG20-20081211/#contrast-ratiodef .. _continuous color scheme: https://vega.github.io/vega/docs/schemes .. _geoArea: https://github.com/d3/d3-geo#geoArea .. _path.area: https://github.com/d3/d3-geo#path_area .. _geoBounds: https://github.com/d3/d3-geo#geoBounds .. _path.bounds: https://github.com/d3/d3-geo#path_bounds .. _geoCentroid: https://github.com/d3/d3-geo#geoCentroid .. _path.centroid: https://github.com/d3/d3-geo#path_centroid .. _window.screen: https://developer.mozilla.org/en-US/docs/Web/API/Window/screen """ @override def __new__(cls: type[_ExprRef], expr: str) -> _ExprRef: # type: ignore[misc] return _ExprRef(expr=expr) @classmethod def isArray(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is an array, false otherwise.""" return FunctionExpression("isArray", (value,)) @classmethod def isBoolean(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is a boolean (``true`` or ``false``), false otherwise.""" return FunctionExpression("isBoolean", (value,)) @classmethod def isDate(cls, value: IntoExpression, /) -> Expression: """ Returns true if ``value`` is a Date object, false otherwise. This method will return false for timestamp numbers or date-formatted strings; it recognizes Date objects only. """ return FunctionExpression("isDate", (value,)) @classmethod def isDefined(cls, value: IntoExpression, /) -> Expression: """ Returns true if ``value`` is a defined value, false if ``value`` equals ``undefined``. This method will return true for ``null`` and ``NaN`` values. """ return FunctionExpression("isDefined", (value,)) @classmethod def isNumber(cls, value: IntoExpression, /) -> Expression: """ Returns true if ``value`` is a number, false otherwise. ``NaN`` and ``Infinity`` are considered numbers. """ return FunctionExpression("isNumber", (value,)) @classmethod def isObject(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is an object (including arrays and Dates), false otherwise.""" return FunctionExpression("isObject", (value,)) @classmethod def isRegExp(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is a RegExp (regular expression) object, false otherwise.""" return FunctionExpression("isRegExp", (value,)) @classmethod def isString(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is a string, false otherwise.""" return FunctionExpression("isString", (value,)) @classmethod def isValid(cls, value: IntoExpression, /) -> Expression: """Returns true if ``value`` is not ``null``, ``undefined``, or ``NaN``, false otherwise.""" return FunctionExpression("isValid", (value,)) @classmethod def toBoolean(cls, value: IntoExpression, /) -> Expression: """ Coerces the input ``value`` to a string. Null values and empty strings are mapped to ``null``. """ return FunctionExpression("toBoolean", (value,)) @classmethod def toDate(cls, value: IntoExpression, /) -> Expression: """ Coerces the input ``value`` to a Date instance. Null values and empty strings are mapped to ``null``. If an optional *parser* function is provided, it is used to perform date parsing, otherwise ``Date.parse`` is used. Be aware that ``Date.parse`` has different implementations across browsers! """ return FunctionExpression("toDate", (value,)) @classmethod def toNumber(cls, value: IntoExpression, /) -> Expression: """ Coerces the input ``value`` to a number. Null values and empty strings are mapped to ``null``. """ return FunctionExpression("toNumber", (value,)) @classmethod def toString(cls, value: IntoExpression, /) -> Expression: """ Coerces the input ``value`` to a string. Null values and empty strings are mapped to ``null``. """ return FunctionExpression("toString", (value,)) @classmethod def if_( cls, test: IntoExpression, thenValue: IntoExpression, elseValue: IntoExpression, /, ) -> Expression: """ If ``test`` is truthy, returns ``thenValue``. Otherwise, returns ``elseValue``. The *if* function is equivalent to the ternary operator ``a ? b : c``. """ return FunctionExpression("if", (test, thenValue, elseValue)) @classmethod def isNaN(cls, value: IntoExpression, /) -> Expression: """ Returns true if ``value`` is not a number. Same as JavaScript's `Number.isNaN`_. .. _Number.isNaN: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isNan """ return FunctionExpression("isNaN", (value,)) @classmethod def isFinite(cls, value: IntoExpression, /) -> Expression: """ Returns true if ``value`` is a finite number. Same as JavaScript's `Number.isFinite`_. .. _Number.isFinite: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/isFinite """ return FunctionExpression("isFinite", (value,)) @classmethod def abs(cls, value: IntoExpression, /) -> Expression: """ Returns the absolute value of ``value``. Same as JavaScript's `Math.abs`_. .. _Math.abs: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/abs """ return FunctionExpression("abs", (value,)) @classmethod def acos(cls, value: IntoExpression, /) -> Expression: """ Trigonometric arccosine. Same as JavaScript's `Math.acos`_. .. _Math.acos: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/acos """ return FunctionExpression("acos", (value,)) @classmethod def asin(cls, value: IntoExpression, /) -> Expression: """ Trigonometric arcsine. Same as JavaScript's `Math.asin`_. .. _Math.asin: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/asin """ return FunctionExpression("asin", (value,)) @classmethod def atan(cls, value: IntoExpression, /) -> Expression: """ Trigonometric arctangent. Same as JavaScript's `Math.atan`_. .. _Math.atan: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/atan """ return FunctionExpression("atan", (value,)) @classmethod def atan2(cls, dy: IntoExpression, dx: IntoExpression, /) -> Expression: """ Returns the arctangent of *dy / dx*. Same as JavaScript's `Math.atan2`_. .. _Math.atan2: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/atan2 """ return FunctionExpression("atan2", (dy, dx)) @classmethod def ceil(cls, value: IntoExpression, /) -> Expression: """ Rounds ``value`` to the nearest integer of equal or greater value. Same as JavaScript's `Math.ceil`_. .. _Math.ceil: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/ceil """ return FunctionExpression("ceil", (value,)) @classmethod def clamp( cls, value: IntoExpression, min: IntoExpression, max: IntoExpression, / ) -> Expression: """Restricts ``value`` to be between the specified ``min`` and ``max``.""" return FunctionExpression("clamp", (value, min, max)) @classmethod def cos(cls, value: IntoExpression, /) -> Expression: """ Trigonometric cosine. Same as JavaScript's `Math.cos`_. .. _Math.cos: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/cos """ return FunctionExpression("cos", (value,)) @classmethod def exp(cls, exponent: IntoExpression, /) -> Expression: """ Returns the value of *e* raised to the provided ``exponent``. Same as JavaScript's `Math.exp`_. .. _Math.exp: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/exp """ return FunctionExpression("exp", (exponent,)) @classmethod def floor(cls, value: IntoExpression, /) -> Expression: """ Rounds ``value`` to the nearest integer of equal or lower value. Same as JavaScript's `Math.floor`_. .. _Math.floor: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/floor """ return FunctionExpression("floor", (value,)) @classmethod def hypot(cls, value: IntoExpression, /) -> Expression: """ Returns the square root of the sum of squares of its arguments. Same as JavaScript's `Math.hypot`_. .. _Math.hypot: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/hypot """ return FunctionExpression("hypot", (value,)) @classmethod def log(cls, value: IntoExpression, /) -> Expression: """ Returns the natural logarithm of ``value``. Same as JavaScript's `Math.log`_. .. _Math.log: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/log """ return FunctionExpression("log", (value,)) @classmethod def max( cls, value1: IntoExpression, value2: IntoExpression, *args: Any ) -> Expression: """ Returns the maximum argument value. Same as JavaScript's `Math.max`_. .. _Math.max: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/max """ return FunctionExpression("max", (value1, value2, *args)) @classmethod def min( cls, value1: IntoExpression, value2: IntoExpression, *args: Any ) -> Expression: """ Returns the minimum argument value. Same as JavaScript's `Math.min`_. .. _Math.min: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/min """ return FunctionExpression("min", (value1, value2, *args)) @classmethod def pow(cls, value: IntoExpression, exponent: IntoExpression, /) -> Expression: """ Returns ``value`` raised to the given ``exponent``. Same as JavaScript's `Math.pow`_. .. _Math.pow: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/pow """ return FunctionExpression("pow", (value, exponent)) @classmethod def random(cls) -> Expression: """ Returns a pseudo-random number in the range [0,1). Same as JavaScript's `Math.random`_. .. _Math.random: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/random """ return FunctionExpression("random", ()) @classmethod def round(cls, value: IntoExpression, /) -> Expression: """ Rounds ``value`` to the nearest integer. Same as JavaScript's `Math.round`_. .. _Math.round: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/round """ return FunctionExpression("round", (value,)) @classmethod def sin(cls, value: IntoExpression, /) -> Expression: """ Trigonometric sine. Same as JavaScript's `Math.sin`_. .. _Math.sin: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/sin """ return FunctionExpression("sin", (value,)) @classmethod def sqrt(cls, value: IntoExpression, /) -> Expression: """ Square root function. Same as JavaScript's `Math.sqrt`_. .. _Math.sqrt: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/sqrt """ return FunctionExpression("sqrt", (value,)) @classmethod def tan(cls, value: IntoExpression, /) -> Expression: """ Trigonometric tangent. Same as JavaScript's `Math.tan`_. .. _Math.tan: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/tan """ return FunctionExpression("tan", (value,)) @classmethod def sampleNormal( cls, mean: IntoExpression = None, stdev: IntoExpression = None, / ) -> Expression: """ Returns a sample from a univariate `normal (Gaussian) probability distribution`_ with specified ``mean`` and standard deviation ``stdev``. If unspecified, the mean defaults to ``0`` and the standard deviation defaults to ``1``. .. _normal (Gaussian) probability distribution: https://en.wikipedia.org/wiki/Normal_distribution """ return FunctionExpression("sampleNormal", (mean, stdev)) @classmethod def cumulativeNormal( cls, value: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `cumulative distribution function`_ at the given input domain ``value`` for a normal distribution with specified ``mean`` and standard deviation ``stdev``. If unspecified, the mean defaults to ``0`` and the standard deviation defaults to ``1``. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("cumulativeNormal", (value, mean, stdev)) @classmethod def densityNormal( cls, value: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `probability density function`_ at the given input domain ``value``, for a normal distribution with specified ``mean`` and standard deviation ``stdev``. If unspecified, the mean defaults to ``0`` and the standard deviation defaults to ``1``. .. _probability density function: https://en.wikipedia.org/wiki/Probability_density_function """ return FunctionExpression("densityNormal", (value, mean, stdev)) @classmethod def quantileNormal( cls, probability: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the quantile value (the inverse of the `cumulative distribution function`_) for the given input ``probability``, for a normal distribution with specified ``mean`` and standard deviation ``stdev``. If unspecified, the mean defaults to ``0`` and the standard deviation defaults to ``1``. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("quantileNormal", (probability, mean, stdev)) @classmethod def sampleLogNormal( cls, mean: IntoExpression = None, stdev: IntoExpression = None, / ) -> Expression: """ Returns a sample from a univariate `log-normal probability distribution`_ with specified log ``mean`` and log standard deviation ``stdev``. If unspecified, the log mean defaults to ``0`` and the log standard deviation defaults to ``1``. .. _log-normal probability distribution: https://en.wikipedia.org/wiki/Log-normal_distribution """ return FunctionExpression("sampleLogNormal", (mean, stdev)) @classmethod def cumulativeLogNormal( cls, value: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `cumulative distribution function`_ at the given input domain ``value`` for a log-normal distribution with specified log ``mean`` and log standard deviation ``stdev``. If unspecified, the log mean defaults to ``0`` and the log standard deviation defaults to ``1``. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("cumulativeLogNormal", (value, mean, stdev)) @classmethod def densityLogNormal( cls, value: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `probability density function`_ at the given input domain ``value``, for a log-normal distribution with specified log ``mean`` and log standard deviation ``stdev``. If unspecified, the log mean defaults to ``0`` and the log standard deviation defaults to ``1``. .. _probability density function: https://en.wikipedia.org/wiki/Probability_density_function """ return FunctionExpression("densityLogNormal", (value, mean, stdev)) @classmethod def quantileLogNormal( cls, probability: IntoExpression, mean: IntoExpression = None, stdev: IntoExpression = None, /, ) -> Expression: """ Returns the quantile value (the inverse of the `cumulative distribution function`_) for the given input ``probability``, for a log-normal distribution with specified log ``mean`` and log standard deviation ``stdev``. If unspecified, the log mean defaults to ``0`` and the log standard deviation defaults to ``1``. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("quantileLogNormal", (probability, mean, stdev)) @classmethod def sampleUniform( cls, min: IntoExpression = None, max: IntoExpression = None, / ) -> Expression: """ Returns a sample from a univariate `continuous uniform probability distribution`_ over the interval [``min``, ``max``). If unspecified, ``min`` defaults to ``0`` and ``max`` defaults to ``1``. If only one argument is provided, it is interpreted as the ``max`` value. .. _continuous uniform probability distribution: https://en.wikipedia.org/wiki/Continuous_uniform_distribution """ return FunctionExpression("sampleUniform", (min, max)) @classmethod def cumulativeUniform( cls, value: IntoExpression, min: IntoExpression = None, max: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `cumulative distribution function`_ at the given input domain ``value`` for a uniform distribution over the interval [``min``, ``max``). If unspecified, ``min`` defaults to ``0`` and ``max`` defaults to ``1``. If only one argument is provided, it is interpreted as the ``max`` value. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("cumulativeUniform", (value, min, max)) @classmethod def densityUniform( cls, value: IntoExpression, min: IntoExpression = None, max: IntoExpression = None, /, ) -> Expression: """ Returns the value of the `probability density function`_ at the given input domain ``value``, for a uniform distribution over the interval [``min``, ``max``). If unspecified, ``min`` defaults to ``0`` and ``max`` defaults to ``1``. If only one argument is provided, it is interpreted as the ``max`` value. .. _probability density function: https://en.wikipedia.org/wiki/Probability_density_function """ return FunctionExpression("densityUniform", (value, min, max)) @classmethod def quantileUniform( cls, probability: IntoExpression, min: IntoExpression = None, max: IntoExpression = None, /, ) -> Expression: """ Returns the quantile value (the inverse of the `cumulative distribution function`_) for the given input ``probability``, for a uniform distribution over the interval [``min``, ``max``). If unspecified, ``min`` defaults to ``0`` and ``max`` defaults to ``1``. If only one argument is provided, it is interpreted as the ``max`` value. .. _cumulative distribution function: https://en.wikipedia.org/wiki/Cumulative_distribution_function """ return FunctionExpression("quantileUniform", (probability, min, max)) @classmethod def now(cls) -> Expression: """Returns the timestamp for the current time.""" return FunctionExpression("now", ()) @classmethod def datetime( cls, year: IntoExpression, month: IntoExpression, day: IntoExpression = None, hour: IntoExpression = None, min: IntoExpression = None, sec: IntoExpression = None, millisec: IntoExpression = None, /, ) -> Expression: """ Returns a new ``Date`` instance. The ``month`` is 0-based, such that ``1`` represents February. """ return FunctionExpression( "datetime", (year, month, day, hour, min, sec, millisec) ) @classmethod def date(cls, datetime: IntoExpression, /) -> Expression: """Returns the day of the month for the given ``datetime`` value, in local time.""" return FunctionExpression("date", (datetime,)) @classmethod def day(cls, datetime: IntoExpression, /) -> Expression: """Returns the day of the week for the given ``datetime`` value, in local time.""" return FunctionExpression("day", (datetime,)) @classmethod def dayofyear(cls, datetime: IntoExpression, /) -> Expression: """Returns the one-based day of the year for the given ``datetime`` value, in local time.""" return FunctionExpression("dayofyear", (datetime,)) @classmethod def year(cls, datetime: IntoExpression, /) -> Expression: """Returns the year for the given ``datetime`` value, in local time.""" return FunctionExpression("year", (datetime,)) @classmethod def quarter(cls, datetime: IntoExpression, /) -> Expression: """Returns the quarter of the year (0-3) for the given ``datetime`` value, in local time.""" return FunctionExpression("quarter", (datetime,)) @classmethod def month(cls, datetime: IntoExpression, /) -> Expression: """Returns the (zero-based) month for the given ``datetime`` value, in local time.""" return FunctionExpression("month", (datetime,)) @classmethod def week(cls, date: IntoExpression, /) -> Expression: """ Returns the week number of the year for the given *datetime*, in local time. This function assumes Sunday-based weeks. Days before the first Sunday of the year are considered to be in week 0, the first Sunday of the year is the start of week 1, the second Sunday week 2, *etc.*. """ return FunctionExpression("week", (date,)) @classmethod def hours(cls, datetime: IntoExpression, /) -> Expression: """Returns the hours component for the given ``datetime`` value, in local time.""" return FunctionExpression("hours", (datetime,)) @classmethod def minutes(cls, datetime: IntoExpression, /) -> Expression: """Returns the minutes component for the given ``datetime`` value, in local time.""" return FunctionExpression("minutes", (datetime,)) @classmethod def seconds(cls, datetime: IntoExpression, /) -> Expression: """Returns the seconds component for the given ``datetime`` value, in local time.""" return FunctionExpression("seconds", (datetime,)) @classmethod def milliseconds(cls, datetime: IntoExpression, /) -> Expression: """Returns the milliseconds component for the given ``datetime`` value, in local time.""" return FunctionExpression("milliseconds", (datetime,)) @classmethod def time(cls, datetime: IntoExpression, /) -> Expression: """Returns the epoch-based timestamp for the given ``datetime`` value.""" return FunctionExpression("time", (datetime,)) @classmethod def timezoneoffset(cls, datetime: IntoExpression, /) -> Expression: """Returns the timezone offset from the local timezone to UTC for the given ``datetime`` value.""" return FunctionExpression("timezoneoffset", (datetime,)) @classmethod def timeOffset( cls, unit: IntoExpression, date: IntoExpression, step: IntoExpression = None, / ) -> Expression: """ Returns a new ``Date`` instance that offsets the given ``date`` by the specified time `*unit*`_ in the local timezone. The optional ``step`` argument indicates the number of time unit steps to offset by (default 1). .. _*unit*: https://vega.github.io/vega/docs/api/time/#time-units """ return FunctionExpression("timeOffset", (unit, date, step)) @classmethod def timeSequence( cls, unit: IntoExpression, start: IntoExpression, stop: IntoExpression, step: IntoExpression = None, /, ) -> Expression: """ Returns an array of ``Date`` instances from ``start`` (inclusive) to ``stop`` (exclusive), with each entry separated by the given time `*unit*`_ in the local timezone. The optional ``step`` argument indicates the number of time unit steps to take between each sequence entry (default 1). .. _*unit*: https://vega.github.io/vega/docs/api/time/#time-units """ return FunctionExpression("timeSequence", (unit, start, stop, step)) @classmethod def utc( cls, year: IntoExpression, month: IntoExpression, day: IntoExpression = None, hour: IntoExpression = None, min: IntoExpression = None, sec: IntoExpression = None, millisec: IntoExpression = None, /, ) -> Expression: """ Returns a timestamp for the given UTC date. The ``month`` is 0-based, such that ``1`` represents February. """ return FunctionExpression("utc", (year, month, day, hour, min, sec, millisec)) @classmethod def utcdate(cls, datetime: IntoExpression, /) -> Expression: """Returns the day of the month for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcdate", (datetime,)) @classmethod def utcday(cls, datetime: IntoExpression, /) -> Expression: """Returns the day of the week for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcday", (datetime,)) @classmethod def utcdayofyear(cls, datetime: IntoExpression, /) -> Expression: """Returns the one-based day of the year for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcdayofyear", (datetime,)) @classmethod def utcyear(cls, datetime: IntoExpression, /) -> Expression: """Returns the year for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcyear", (datetime,)) @classmethod def utcquarter(cls, datetime: IntoExpression, /) -> Expression: """Returns the quarter of the year (0-3) for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcquarter", (datetime,)) @classmethod def utcmonth(cls, datetime: IntoExpression, /) -> Expression: """Returns the (zero-based) month for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcmonth", (datetime,)) @classmethod def utcweek(cls, date: IntoExpression, /) -> Expression: """ Returns the week number of the year for the given *datetime*, in UTC time. This function assumes Sunday-based weeks. Days before the first Sunday of the year are considered to be in week 0, the first Sunday of the year is the start of week 1, the second Sunday week 2, *etc.*. """ return FunctionExpression("utcweek", (date,)) @classmethod def utchours(cls, datetime: IntoExpression, /) -> Expression: """Returns the hours component for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utchours", (datetime,)) @classmethod def utcminutes(cls, datetime: IntoExpression, /) -> Expression: """Returns the minutes component for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcminutes", (datetime,)) @classmethod def utcseconds(cls, datetime: IntoExpression, /) -> Expression: """Returns the seconds component for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcseconds", (datetime,)) @classmethod def utcmilliseconds(cls, datetime: IntoExpression, /) -> Expression: """Returns the milliseconds component for the given ``datetime`` value, in UTC time.""" return FunctionExpression("utcmilliseconds", (datetime,)) @classmethod def utcOffset( cls, unit: IntoExpression, date: IntoExpression, step: IntoExpression = None, / ) -> Expression: """ Returns a new ``Date`` instance that offsets the given ``date`` by the specified time `*unit*`_ in UTC time. The optional ``step`` argument indicates the number of time unit steps to offset by (default 1). .. _*unit*: https://vega.github.io/vega/docs/api/time/#time-units """ return FunctionExpression("utcOffset", (unit, date, step)) @classmethod def utcSequence( cls, unit: IntoExpression, start: IntoExpression, stop: IntoExpression, step: IntoExpression = None, /, ) -> Expression: """ Returns an array of ``Date`` instances from ``start`` (inclusive) to ``stop`` (exclusive), with each entry separated by the given time `*unit*`_ in UTC time. The optional ``step`` argument indicates the number of time unit steps to take between each sequence entry (default 1). .. _*unit*: https://vega.github.io/vega/docs/api/time/#time-units """ return FunctionExpression("utcSequence", (unit, start, stop, step)) @classmethod def extent(cls, array: IntoExpression, /) -> Expression: """Returns a new *[min, max]* array with the minimum and maximum values of the input array, ignoring ``null``, ``undefined``, and ``NaN`` values.""" return FunctionExpression("extent", (array,)) @classmethod def clampRange( cls, range: IntoExpression, min: IntoExpression, max: IntoExpression, / ) -> Expression: """ Clamps a two-element ``range`` array in a span-preserving manner. If the span of the input ``range`` is less than *(max - min)* and an endpoint exceeds either the ``min`` or ``max`` value, the range is translated such that the span is preserved and one endpoint touches the boundary of the *[min, max]* range. If the span exceeds *(max - min)*, the range *[min, max]* is returned. """ return FunctionExpression("clampRange", (range, min, max)) @classmethod def indexof(cls, array: IntoExpression, value: IntoExpression, /) -> Expression: """Returns the first index of ``value`` in the input ``array``.""" return FunctionExpression("indexof", (array, value)) @classmethod def inrange(cls, value: IntoExpression, range: IntoExpression, /) -> Expression: """Tests whether ``value`` lies within (or is equal to either) the first and last values of the ``range`` array.""" return FunctionExpression("inrange", (value, range)) @classmethod def join( cls, array: IntoExpression, separator: IntoExpression = None, / ) -> Expression: """Returns a new string by concatenating all of the elements of the input ``array``, separated by commas or a specified ``separator`` string.""" return FunctionExpression("join", (array, separator)) @classmethod def lastindexof(cls, array: IntoExpression, value: IntoExpression, /) -> Expression: """Returns the last index of ``value`` in the input ``array``.""" return FunctionExpression("lastindexof", (array, value)) @classmethod def length(cls, array: IntoExpression, /) -> Expression: """Returns the length of the input ``array``.""" return FunctionExpression("length", (array,)) @classmethod def lerp(cls, array: IntoExpression, fraction: IntoExpression, /) -> Expression: """ Returns the linearly interpolated value between the first and last entries in the ``array`` for the provided interpolation ``fraction`` (typically between 0 and 1). For example, ``alt.expr.lerp([0, 50], 0.5)`` returns 25. """ return FunctionExpression("lerp", (array, fraction)) @classmethod def peek(cls, array: IntoExpression, /) -> Expression: """ Returns the last element in the input ``array``. Similar to the built-in ``Array.pop`` method, except that it does not remove the last element. This method is a convenient shorthand for ``array[array.length - 1]``. """ return FunctionExpression("peek", (array,)) @classmethod def pluck(cls, array: IntoExpression, field: IntoExpression, /) -> Expression: """ Retrieves the value for the specified ``field`` from a given ``array`` of objects. The input ``field`` string may include nested properties (e.g., ``foo.bar.bz``). """ return FunctionExpression("pluck", (array, field)) @classmethod def reverse(cls, array: IntoExpression, /) -> Expression: """ Returns a new array with elements in a reverse order of the input ``array``. The first array element becomes the last, and the last array element becomes the first. """ return FunctionExpression("reverse", (array,)) @classmethod def sequence(cls, *args: Any) -> Expression: """ Returns an array containing an arithmetic sequence of numbers. If ``step`` is omitted, it defaults to 1. If ``start`` is omitted, it defaults to 0. The ``stop`` value is exclusive; it is not included in the result. If ``step`` is positive, the last element is the largest *start + i * step* less than ``stop``; if ``step`` is negative, the last element is the smallest *start + i * step* greater than ``stop``. If the returned array would contain an infinite number of values, an empty range is returned. The arguments are not required to be integers. """ return FunctionExpression("sequence", args) @classmethod def slice( cls, array: IntoExpression, start: IntoExpression, end: IntoExpression = None, / ) -> Expression: """ Returns a section of ``array`` between the ``start`` and ``end`` indices. If the ``end`` argument is negative, it is treated as an offset from the end of the array (*alt.expr.length(array) + end*). """ return FunctionExpression("slice", (array, start, end)) @classmethod def sort(cls, array: IntoExpression, /) -> Expression: """ Sorts the array in natural order using `ascending from Vega Utils`_. .. _ascending from Vega Utils: https://vega.github.io/vega/docs/api/util/#ascending """ return FunctionExpression("sort", (array,)) @classmethod def span(cls, array: IntoExpression, /) -> Expression: """Returns the span of ``array``: the difference between the last and first elements, or *array[array.length-1] - array[0]*.""" return FunctionExpression("span", (array,)) @classmethod def lower(cls, string: IntoExpression, /) -> Expression: """Transforms ``string`` to lower-case letters.""" return FunctionExpression("lower", (string,)) @classmethod def pad( cls, string: IntoExpression, length: IntoExpression, character: IntoExpression = None, align: IntoExpression = None, /, ) -> Expression: """ Pads a ``string`` value with repeated instances of a ``character`` up to a specified ``length``. If ``character`` is not specified, a space (' ') is used. By default, padding is added to the end of a string. An optional ``align`` parameter specifies if padding should be added to the ``'left'`` (beginning), ``'center'``, or ``'right'`` (end) of the input string. """ return FunctionExpression("pad", (string, length, character, align)) @classmethod def parseFloat(cls, string: IntoExpression, /) -> Expression: """ Parses the input ``string`` to a floating-point value. Same as JavaScript's ``parseFloat``. """ return FunctionExpression("parseFloat", (string,)) @classmethod def parseInt(cls, string: IntoExpression, /) -> Expression: """ Parses the input ``string`` to an integer value. Same as JavaScript's ``parseInt``. """ return FunctionExpression("parseInt", (string,)) @classmethod def replace( cls, string: IntoExpression, pattern: IntoExpression, replacement: IntoExpression, /, ) -> Expression: """ Returns a new string with some or all matches of ``pattern`` replaced by a ``replacement`` string. The ``pattern`` can be a string or a regular expression. If ``pattern`` is a string, only the first instance will be replaced. Same as `JavaScript's String.replace`_. .. _JavaScript's String.replace: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace """ return FunctionExpression("replace", (string, pattern, replacement)) @classmethod def substring( cls, string: IntoExpression, start: IntoExpression, end: IntoExpression = None, /, ) -> Expression: """Returns a section of ``string`` between the ``start`` and ``end`` indices.""" return FunctionExpression("substring", (string, start, end)) @classmethod def trim(cls, string: IntoExpression, /) -> Expression: """Returns a trimmed string with preceding and trailing whitespace removed.""" return FunctionExpression("trim", (string,)) @classmethod def truncate( cls, string: IntoExpression, length: IntoExpression, align: IntoExpression = None, ellipsis: IntoExpression = None, /, ) -> Expression: """ Truncates an input ``string`` to a target ``length``. The optional ``align`` argument indicates what part of the string should be truncated: ``'left'`` (the beginning), ``'center'``, or ``'right'`` (the end). By default, the ``'right'`` end of the string is truncated. The optional ``ellipsis`` argument indicates the string to use to indicate truncated content; by default the ellipsis character ``…`` (``\u2026``) is used. """ return FunctionExpression("truncate", (string, length, align, ellipsis)) @classmethod def upper(cls, string: IntoExpression, /) -> Expression: """Transforms ``string`` to upper-case letters.""" return FunctionExpression("upper", (string,)) @classmethod def btoa(cls, string: IntoExpression, /) -> Expression: """ Creates a `Base64`_-encoded `ASCII`_ string. Same as JavaScript's `Window.alt.expr.btoa()`_. .. _Base64: https://developer.mozilla.org/en-US/docs/Glossary/Base64 .. _ASCII: https://developer.mozilla.org/en-US/docs/Glossary/ASCII .. _Window.alt.expr.btoa(): https://developer.mozilla.org/en-US/docs/Web/API/Window/btoa """ return FunctionExpression("btoa", (string,)) @classmethod def atob(cls, string: IntoExpression, /) -> Expression: """ Decodes an `ASCII`_ string that was encoded with `Base64`_. Same as JavaScript's `Window.alt.expr.atob()`_. .. _ASCII: https://developer.mozilla.org/en-US/docs/Glossary/ASCII .. _Base64: https://developer.mozilla.org/en-US/docs/Glossary/Base64 .. _Window.alt.expr.atob(): https://developer.mozilla.org/en-US/docs/Web/API/Window/atob """ return FunctionExpression("atob", (string,)) @classmethod def merge( cls, object1: IntoExpression, object2: IntoExpression = None, *args: Any ) -> Expression: """ Merges the input objects ``object1``, ``object2``, etc into a new output object. Inputs are visited in sequential order, such that key values from later arguments can overwrite those from earlier arguments. Example: ``alt.expr.merge({a:1, b:2}, {a:3}) -> {a:3, b:2}``. """ return FunctionExpression("merge", (object1, object2, *args)) @classmethod def dayFormat(cls, day: IntoExpression, /) -> Expression: """ Formats a (0-6) *weekday* number as a full week day name, according to the current locale. For example: ``alt.expr.dayFormat(0) -> "Sunday"``. """ return FunctionExpression("dayFormat", (day,)) @classmethod def dayAbbrevFormat(cls, day: IntoExpression, /) -> Expression: """ Formats a (0-6) *weekday* number as an abbreviated week day name, according to the current locale. For example: ``alt.expr.dayAbbrevFormat(0) -> "Sun"``. """ return FunctionExpression("dayAbbrevFormat", (day,)) @classmethod def format(cls, value: IntoExpression, specifier: IntoExpression, /) -> Expression: """ Formats a numeric ``value`` as a string. The ``specifier`` must be a valid `d3-format specifier`_ (e.g., ``alt.expr.format(value, ',.2f')``. Null values are formatted as ``"null"``. .. _d3-format specifier: https://github.com/d3/d3-format/ """ return FunctionExpression("format", (value, specifier)) @classmethod def monthFormat(cls, month: IntoExpression, /) -> Expression: """ Formats a (zero-based) ``month`` number as a full month name, according to the current locale. For example: ``alt.expr.monthFormat(0) -> "January"``. """ return FunctionExpression("monthFormat", (month,)) @classmethod def monthAbbrevFormat(cls, month: IntoExpression, /) -> Expression: """ Formats a (zero-based) ``month`` number as an abbreviated month name, according to the current locale. For example: ``alt.expr.monthAbbrevFormat(0) -> "Jan"``. """ return FunctionExpression("monthAbbrevFormat", (month,)) @classmethod def timeUnitSpecifier( cls, units: IntoExpression, specifiers: IntoExpression = None, / ) -> Expression: """ Returns a time format specifier string for the given time `*units*`_. The optional ``specifiers`` object provides a set of specifier sub-strings for customizing the format; for more, see the `timeUnitSpecifier API documentation`_. The resulting specifier string can then be used as input to the `timeFormat`_ or `utcFormat`_ functions, or as the *format* parameter of an axis or legend. For example: ``alt.expr.timeFormat(date, alt.expr.timeUnitSpecifier('year'))`` or ``alt.expr.timeFormat(date, alt.expr.timeUnitSpecifier(['hours', 'minutes']))``. .. _*units*: https://vega.github.io/vega/docs/api/time/#time-units .. _timeUnitSpecifier API documentation: https://vega.github.io/vega/docs/api/time/#timeUnitSpecifier .. _timeFormat: https://vega.github.io/vega/docs/expressions/#timeFormat .. _utcFormat: https://vega.github.io/vega/docs/expressions/#utcFormat """ return FunctionExpression("timeUnitSpecifier", (units, specifiers)) @classmethod def timeFormat( cls, value: IntoExpression, specifier: IntoExpression, / ) -> Expression: """ Formats a datetime ``value`` (either a ``Date`` object or timestamp) as a string, according to the local time. The ``specifier`` must be a valid `d3-time-format specifier`_ or `TimeMultiFormat object`_. For example: ``alt.expr.timeFormat(timestamp, '%A')``. Null values are formatted as ``"null"``. .. _d3-time-format specifier: https://github.com/d3/d3-time-format/ .. _TimeMultiFormat object: https://vega.github.io/vega/docs/types/#TimeMultiFormat """ return FunctionExpression("timeFormat", (value, specifier)) @classmethod def timeParse( cls, string: IntoExpression, specifier: IntoExpression, / ) -> Expression: """ Parses a ``string`` value to a Date object, according to the local time. The ``specifier`` must be a valid `d3-time-format specifier`_. For example: ``alt.expr.timeParse('June 30, 2015', '%B %d, %Y')``. .. _d3-time-format specifier: https://github.com/d3/d3-time-format/ """ return FunctionExpression("timeParse", (string, specifier)) @classmethod def utcFormat( cls, value: IntoExpression, specifier: IntoExpression, / ) -> Expression: """ Formats a datetime ``value`` (either a ``Date`` object or timestamp) as a string, according to `UTC`_ time. The ``specifier`` must be a valid `d3-time-format specifier`_ or `TimeMultiFormat object`_. For example: ``alt.expr.utcFormat(timestamp, '%A')``. Null values are formatted as ``"null"``. .. _UTC: https://en.wikipedia.org/wiki/Coordinated_Universal_Time .. _d3-time-format specifier: https://github.com/d3/d3-time-format/ .. _TimeMultiFormat object: https://vega.github.io/vega/docs/types/#TimeMultiFormat """ return FunctionExpression("utcFormat", (value, specifier)) @classmethod def utcParse( cls, value: IntoExpression, specifier: IntoExpression, / ) -> Expression: """ Parses a *string* value to a Date object, according to `UTC`_ time. The ``specifier`` must be a valid `d3-time-format specifier`_. For example: ``alt.expr.utcParse('June 30, 2015', '%B %d, %Y')``. .. _UTC: https://en.wikipedia.org/wiki/Coordinated_Universal_Time .. _d3-time-format specifier: https://github.com/d3/d3-time-format/ """ return FunctionExpression("utcParse", (value, specifier)) @classmethod def regexp( cls, pattern: IntoExpression, flags: IntoExpression = None, / ) -> Expression: """ Creates a regular expression instance from an input ``pattern`` string and optional ``flags``. Same as `JavaScript's RegExp`_. .. _JavaScript's RegExp: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp """ return FunctionExpression("regexp", (pattern, flags)) @classmethod def test( cls, regexp: IntoExpression, string: IntoExpression = None, / ) -> Expression: r""" Evaluates a regular expression ``regexp`` against the input ``string``, returning ``true`` if the string matches the pattern, ``false`` otherwise. For example: ``alt.expr.test(/\\d{3}/, "32-21-9483") -> true``. """ return FunctionExpression("test", (regexp, string)) @classmethod def rgb(cls, *args: Any) -> Expression: """ Constructs a new `RGB`_ color. If ``r``, ``g`` and ``b`` are specified, these represent the channel values of the returned color; an ``opacity`` may also be specified. If a CSS Color Module Level 3 *specifier* string is specified, it is parsed and then converted to the RGB color space. Uses `d3-color's rgb function`_. .. _RGB: https://en.wikipedia.org/wiki/RGB_color_model .. _d3-color's rgb function: https://github.com/d3/d3-color#rgb """ return FunctionExpression("rgb", args) @classmethod def hsl(cls, *args: Any) -> Expression: """ Constructs a new `HSL`_ color. If ``h``, ``s`` and ``l`` are specified, these represent the channel values of the returned color; an ``opacity`` may also be specified. If a CSS Color Module Level 3 *specifier* string is specified, it is parsed and then converted to the HSL color space. Uses `d3-color's hsl function`_. .. _HSL: https://en.wikipedia.org/wiki/HSL_and_HSV .. _d3-color's hsl function: https://github.com/d3/d3-color#hsl """ return FunctionExpression("hsl", args) @classmethod def lab(cls, *args: Any) -> Expression: """ Constructs a new `CIE LAB`_ color. If ``l``, ``a`` and ``b`` are specified, these represent the channel values of the returned color; an ``opacity`` may also be specified. If a CSS Color Module Level 3 *specifier* string is specified, it is parsed and then converted to the LAB color space. Uses `d3-color's lab function`_. .. _CIE LAB: https://en.wikipedia.org/wiki/Lab_color_space#CIELAB .. _d3-color's lab function: https://github.com/d3/d3-color#lab """ return FunctionExpression("lab", args) @classmethod def hcl(cls, *args: Any) -> Expression: """ Constructs a new `HCL`_ (hue, chroma, luminance) color. If ``h``, ``c`` and ``l`` are specified, these represent the channel values of the returned color; an ``opacity`` may also be specified. If a CSS Color Module Level 3 *specifier* string is specified, it is parsed and then converted to the HCL color space. Uses `d3-color's hcl function`_. .. _HCL: https://en.wikipedia.org/wiki/Lab_color_space#CIELAB .. _d3-color's hcl function: https://github.com/d3/d3-color#hcl """ return FunctionExpression("hcl", args) @classmethod def luminance(cls, specifier: IntoExpression, /) -> Expression: """ Returns the luminance for the given color ``specifier`` (compatible with `d3-color's rgb function`_). The luminance is calculated according to the `W3C Web Content Accessibility Guidelines`_. .. _d3-color's rgb function: https://github.com/d3/d3-color#rgb .. _W3C Web Content Accessibility Guidelines: https://www.w3.org/TR/2008/REC-WCAG20-20081211/#relativeluminancedef """ return FunctionExpression("luminance", (specifier,)) @classmethod def contrast( cls, specifier1: IntoExpression, specifier2: IntoExpression, / ) -> Expression: """ Returns the contrast ratio between the input color specifiers as a float between 1 and 21. The contrast is calculated according to the `W3C Web Content Accessibility Guidelines`_. .. _W3C Web Content Accessibility Guidelines: https://www.w3.org/TR/2008/REC-WCAG20-20081211/#contrast-ratiodef """ return FunctionExpression("contrast", (specifier1, specifier2)) @classmethod def item(cls) -> Expression: """Returns the current scenegraph item that is the target of the event.""" return FunctionExpression("item", ()) @classmethod def group(cls, name: IntoExpression = None, /) -> Expression: """ Returns the scenegraph group mark item in which the current event has occurred. If no arguments are provided, the immediate parent group is returned. If a group name is provided, the matching ancestor group item is returned. """ return FunctionExpression("group", (name,)) @classmethod def xy(cls, item: IntoExpression = None, /) -> Expression: """ Returns the x- and y-coordinates for the current event as a two-element array. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph ``item`` (or string group name) is provided, the coordinate space of the group item is used. """ return FunctionExpression("xy", (item,)) @classmethod def x(cls, item: IntoExpression = None, /) -> Expression: """ Returns the x coordinate for the current event. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph ``item`` (or string group name) is provided, the coordinate space of the group item is used. """ return FunctionExpression("x", (item,)) @classmethod def y(cls, item: IntoExpression = None, /) -> Expression: """ Returns the y coordinate for the current event. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph ``item`` (or string group name) is provided, the coordinate space of the group item is used. """ return FunctionExpression("y", (item,)) @classmethod def pinchDistance(cls, event: IntoExpression, /) -> Expression: """Returns the pixel distance between the first two touch points of a multi-touch event.""" return FunctionExpression("pinchDistance", (event,)) @classmethod def pinchAngle(cls, event: IntoExpression, /) -> Expression: """Returns the angle of the line connecting the first two touch points of a multi-touch event.""" return FunctionExpression("pinchAngle", (event,)) @classmethod def inScope(cls, item: IntoExpression, /) -> Expression: """Returns true if the given scenegraph ``item`` is a descendant of the group mark in which the event handler was defined, false otherwise.""" return FunctionExpression("inScope", (item,)) @classmethod def data(cls, name: IntoExpression, /) -> Expression: """ Returns the array of data objects for the Vega data set with the given ``name``. If the data set is not found, returns an empty array. """ return FunctionExpression("data", (name,)) @classmethod def indata( cls, name: IntoExpression, field: IntoExpression, value: IntoExpression, / ) -> Expression: """ Tests if the data set with a given ``name`` contains a datum with a ``field`` value that matches the input ``value``. For example: ``alt.expr.indata('table', 'category', value)``. """ return FunctionExpression("indata", (name, field, value)) @classmethod def scale( cls, name: IntoExpression, value: IntoExpression, group: IntoExpression = None, /, ) -> Expression: """ Applies the named scale transform (or projection) to the specified ``value``. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection. """ return FunctionExpression("scale", (name, value, group)) @classmethod def invert( cls, name: IntoExpression, value: IntoExpression, group: IntoExpression = None, /, ) -> Expression: """ Inverts the named scale transform (or projection) for the specified ``value``. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection. """ return FunctionExpression("invert", (name, value, group)) @classmethod def copy(cls, name: IntoExpression, group: IntoExpression = None, /) -> Expression: # type: ignore[override] """ Returns a copy (a new cloned instance) of the named scale transform of projection, or ``undefined`` if no scale or projection is found. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection. """ return FunctionExpression("copy", (name, group)) @classmethod def domain( cls, name: IntoExpression, group: IntoExpression = None, / ) -> Expression: """ Returns the scale domain array for the named scale transform, or an empty array if the scale is not found. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale. """ return FunctionExpression("domain", (name, group)) @classmethod def range(cls, name: IntoExpression, group: IntoExpression = None, /) -> Expression: """ Returns the scale range array for the named scale transform, or an empty array if the scale is not found. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale. """ return FunctionExpression("range", (name, group)) @classmethod def bandwidth( cls, name: IntoExpression, group: IntoExpression = None, / ) -> Expression: """ Returns the current band width for the named band scale transform, or zero if the scale is not found or is not a band scale. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale. """ return FunctionExpression("bandwidth", (name, group)) @classmethod def bandspace( cls, count: IntoExpression, paddingInner: IntoExpression = None, paddingOuter: IntoExpression = None, /, ) -> Expression: """ Returns the number of steps needed within a band scale, based on the ``count`` of domain elements and the inner and outer padding values. While normally calculated within the scale itself, this function can be helpful for determining the size of a chart's layout. """ return FunctionExpression("bandspace", (count, paddingInner, paddingOuter)) @classmethod def gradient( cls, scale: IntoExpression, p0: IntoExpression, p1: IntoExpression, count: IntoExpression = None, /, ) -> Expression: """ Returns a linear color gradient for the ``scale`` (whose range must be a `continuous color scheme`_) and starting and ending points ``p0`` and ``p1``, each an *[x, y]* array. The points ``p0`` and ``p1`` should be expressed in normalized coordinates in the domain [0, 1], relative to the bounds of the item being colored. If unspecified, ``p0`` defaults to ``[0, 0]`` and ``p1`` defaults to ``[1, 0]``, for a horizontal gradient that spans the full bounds of an item. The optional ``count`` argument indicates a desired target number of sample points to take from the color scale. .. _continuous color scheme: https://vega.github.io/vega/docs/schemes """ return FunctionExpression("gradient", (scale, p0, p1, count)) @classmethod def panLinear(cls, domain: IntoExpression, delta: IntoExpression, /) -> Expression: """ Given a linear scale ``domain`` array with numeric or datetime values, returns a new two-element domain array that is the result of panning the domain by a fractional ``delta``. The ``delta`` value represents fractional units of the scale range; for example, ``0.5`` indicates panning the scale domain to the right by half the scale range. """ return FunctionExpression("panLinear", (domain, delta)) @classmethod def panLog(cls, domain: IntoExpression, delta: IntoExpression, /) -> Expression: """ Given a log scale ``domain`` array with numeric or datetime values, returns a new two-element domain array that is the result of panning the domain by a fractional ``delta``. The ``delta`` value represents fractional units of the scale range; for example, ``0.5`` indicates panning the scale domain to the right by half the scale range. """ return FunctionExpression("panLog", (domain, delta)) @classmethod def panPow( cls, domain: IntoExpression, delta: IntoExpression, exponent: IntoExpression, / ) -> Expression: """ Given a power scale ``domain`` array with numeric or datetime values and the given ``exponent``, returns a new two-element domain array that is the result of panning the domain by a fractional ``delta``. The ``delta`` value represents fractional units of the scale range; for example, ``0.5`` indicates panning the scale domain to the right by half the scale range. """ return FunctionExpression("panPow", (domain, delta, exponent)) @classmethod def panSymlog( cls, domain: IntoExpression, delta: IntoExpression, constant: IntoExpression, / ) -> Expression: """ Given a symmetric log scale ``domain`` array with numeric or datetime values parameterized by the given ``constant``, returns a new two-element domain array that is the result of panning the domain by a fractional ``delta``. The ``delta`` value represents fractional units of the scale range; for example, ``0.5`` indicates panning the scale domain to the right by half the scale range. """ return FunctionExpression("panSymlog", (domain, delta, constant)) @classmethod def zoomLinear( cls, domain: IntoExpression, anchor: IntoExpression, scaleFactor: IntoExpression, /, ) -> Expression: """ Given a linear scale ``domain`` array with numeric or datetime values, returns a new two-element domain array that is the result of zooming the domain by a ``scaleFactor``, centered at the provided fractional ``anchor``. The ``anchor`` value represents the zoom position in terms of fractional units of the scale range; for example, ``0.5`` indicates a zoom centered on the mid-point of the scale range. """ return FunctionExpression("zoomLinear", (domain, anchor, scaleFactor)) @classmethod def zoomLog( cls, domain: IntoExpression, anchor: IntoExpression, scaleFactor: IntoExpression, /, ) -> Expression: """ Given a log scale ``domain`` array with numeric or datetime values, returns a new two-element domain array that is the result of zooming the domain by a ``scaleFactor``, centered at the provided fractional ``anchor``. The ``anchor`` value represents the zoom position in terms of fractional units of the scale range; for example, ``0.5`` indicates a zoom centered on the mid-point of the scale range. """ return FunctionExpression("zoomLog", (domain, anchor, scaleFactor)) @classmethod def zoomPow( cls, domain: IntoExpression, anchor: IntoExpression, scaleFactor: IntoExpression, exponent: IntoExpression, /, ) -> Expression: """ Given a power scale ``domain`` array with numeric or datetime values and the given ``exponent``, returns a new two-element domain array that is the result of zooming the domain by a ``scaleFactor``, centered at the provided fractional ``anchor``. The ``anchor`` value represents the zoom position in terms of fractional units of the scale range; for example, ``0.5`` indicates a zoom centered on the mid-point of the scale range. """ return FunctionExpression("zoomPow", (domain, anchor, scaleFactor, exponent)) @classmethod def zoomSymlog( cls, domain: IntoExpression, anchor: IntoExpression, scaleFactor: IntoExpression, constant: IntoExpression, /, ) -> Expression: """ Given a symmetric log scale ``domain`` array with numeric or datetime values parameterized by the given ``constant``, returns a new two-element domain array that is the result of zooming the domain by a ``scaleFactor``, centered at the provided fractional ``anchor``. The ``anchor`` value represents the zoom position in terms of fractional units of the scale range; for example, ``0.5`` indicates a zoom centered on the mid-point of the scale range. """ return FunctionExpression("zoomSymlog", (domain, anchor, scaleFactor, constant)) @classmethod def geoArea( cls, projection: IntoExpression, feature: IntoExpression, group: IntoExpression = None, /, ) -> Expression: """ Returns the projected planar area (typically in square pixels) of a GeoJSON ``feature`` according to the named ``projection``. If the ``projection`` argument is ``null``, computes the spherical area in steradians using unprojected longitude, latitude coordinates. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's `geoArea`_ and `path.area`_ methods. .. _geoArea: https://github.com/d3/d3-geo#geoArea .. _path.area: https://github.com/d3/d3-geo#path_area """ return FunctionExpression("geoArea", (projection, feature, group)) @classmethod def geoBounds( cls, projection: IntoExpression, feature: IntoExpression, group: IntoExpression = None, /, ) -> Expression: """ Returns the projected planar bounding box (typically in pixels) for the specified GeoJSON ``feature``, according to the named ``projection``. The bounding box is represented by a two-dimensional array: [[*x₀*, *y₀*], [*x₁*, *y₁*]], where *x₀* is the minimum x-coordinate, *y₀* is the minimum y-coordinate, *x₁* is the maximum x-coordinate, and *y₁* is the maximum y-coordinate. If the ``projection`` argument is ``null``, computes the spherical bounding box using unprojected longitude, latitude coordinates. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's `geoBounds`_ and `path.bounds`_ methods. .. _geoBounds: https://github.com/d3/d3-geo#geoBounds .. _path.bounds: https://github.com/d3/d3-geo#path_bounds """ return FunctionExpression("geoBounds", (projection, feature, group)) @classmethod def geoCentroid( cls, projection: IntoExpression, feature: IntoExpression, group: IntoExpression = None, /, ) -> Expression: """ Returns the projected planar centroid (typically in pixels) for the specified GeoJSON ``feature``, according to the named ``projection``. If the ``projection`` argument is ``null``, computes the spherical centroid using unprojected longitude, latitude coordinates. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's `geoCentroid`_ and `path.centroid`_ methods. .. _geoCentroid: https://github.com/d3/d3-geo#geoCentroid .. _path.centroid: https://github.com/d3/d3-geo#path_centroid """ return FunctionExpression("geoCentroid", (projection, feature, group)) @classmethod def geoScale( cls, projection: IntoExpression, group: IntoExpression = None, / ) -> Expression: """ Returns the scale value for the named ``projection``. The optional ``group`` argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. """ return FunctionExpression("geoScale", (projection, group)) @classmethod def treePath( cls, name: IntoExpression, source: IntoExpression, target: IntoExpression, / ) -> Expression: """ For the hierarchy data set with the given ``name``, returns the shortest path through from the ``source`` node id to the ``target`` node id. The path starts at the ``source`` node, ascends to the least common ancestor of the ``source`` node and the ``target`` node, and then descends to the ``target`` node. """ return FunctionExpression("treePath", (name, source, target)) @classmethod def treeAncestors(cls, name: IntoExpression, node: IntoExpression, /) -> Expression: """For the hierarchy data set with the given ``name``, returns the array of ancestors nodes, starting with the input ``node``, then followed by each parent up to the root.""" return FunctionExpression("treeAncestors", (name, node)) @classmethod def containerSize(cls) -> Expression: """ Returns the current CSS box size (``[el.clientWidth, el.clientHeight]``) of the parent DOM element that contains the Vega view. If there is no container element, returns ``[undefined, undefined]``. """ return FunctionExpression("containerSize", ()) @classmethod def screen(cls) -> Expression: """ Returns the `window.screen`_ object, or ``{}`` if Vega is not running in a browser environment. .. _window.screen: https://developer.mozilla.org/en-US/docs/Web/API/Window/screen """ return FunctionExpression("screen", ()) @classmethod def windowSize(cls) -> Expression: """Returns the current window size (``[window.innerWidth, window.innerHeight]``) or ``[undefined, undefined]`` if Vega is not running in a browser environment.""" return FunctionExpression("windowSize", ()) @classmethod def warn( cls, value1: IntoExpression, value2: IntoExpression = None, *args: Any ) -> Expression: """ Logs a warning message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set. """ return FunctionExpression("warn", (value1, value2, *args)) @classmethod def info( cls, value1: IntoExpression, value2: IntoExpression = None, *args: Any ) -> Expression: """ Logs an informative message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set. """ return FunctionExpression("info", (value1, value2, *args)) @classmethod def debug( cls, value1: IntoExpression, value2: IntoExpression = None, *args: Any ) -> Expression: """ Logs a debugging message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set. """ return FunctionExpression("debug", (value1, value2, *args)) _ExprType = expr # NOTE: Compatibility alias for previous type of `alt.expr`. # `_ExprType` was not referenced in any internal imports/tests. ================================================ FILE: altair/expr/consts.py ================================================ from __future__ import annotations CONST_LISTING = { "NaN": "not a number (same as JavaScript literal NaN)", "LN10": "the natural log of 10 (alias to Math.LN10)", "E": "the transcendental number e (alias to Math.E)", "LOG10E": "the base 10 logarithm e (alias to Math.LOG10E)", "LOG2E": "the base 2 logarithm of e (alias to Math.LOG2E)", "SQRT1_2": "the square root of 0.5 (alias to Math.SQRT1_2)", "LN2": "the natural log of 2 (alias to Math.LN2)", "SQRT2": "the square root of 2 (alias to Math.SQRT1_2)", "PI": "the transcendental number pi (alias to Math.PI)", } ================================================ FILE: altair/expr/core.py ================================================ from __future__ import annotations import datetime as dt import sys from typing import TYPE_CHECKING, Any, Literal, Union from altair.utils import SchemaBase if TYPE_CHECKING: from typing import TypeAlias from altair.vegalite.v6.schema._typing import Map, PrimitiveValue_T class DatumType: """An object to assist in building Vega-Lite Expressions.""" def __repr__(self) -> str: return "datum" def __getattr__(self, attr) -> GetAttrExpression: if attr.startswith("__") and attr.endswith("__"): raise AttributeError(attr) return GetAttrExpression("datum", attr) def __getitem__(self, attr) -> GetItemExpression: return GetItemExpression("datum", attr) def __call__(self, datum, **kwargs) -> dict[str, Any]: """Specify a datum for use in an encoding.""" return dict(datum=datum, **kwargs) datum = DatumType() def _js_repr(val) -> str: """Return a javascript-safe string representation of val.""" if val is True: return "true" elif val is False: return "false" elif val is None: return "null" elif isinstance(val, OperatorMixin): return val._to_expr() elif isinstance(val, dt.date): return _from_date_datetime(val) elif _is_numpy_generic(val): return repr(val.item()) else: return repr(val) def _from_date_datetime(obj: dt.date | dt.datetime, /) -> str: """ Parse native `datetime.(date|datetime)` into a `datetime expression`_ string. **Month is 0-based** .. _datetime expression: https://vega.github.io/vega/docs/expressions/#datetime """ fn_name: Literal["datetime", "utc"] = "datetime" args: tuple[int, ...] = obj.year, obj.month - 1, obj.day if isinstance(obj, dt.datetime): if tzinfo := obj.tzinfo: if tzinfo is dt.timezone.utc: fn_name = "utc" else: msg = ( f"Unsupported timezone {tzinfo!r}.\n" "Only `'UTC'` or naive (local) datetimes are permitted.\n" "See https://altair-viz.github.io/user_guide/generated/core/altair.DateTime.html" ) raise TypeError(msg) us = obj.microsecond ms = us if us == 0 else us // 1_000 args = *args, obj.hour, obj.minute, obj.second, ms return FunctionExpression(fn_name, args)._to_expr() def _is_numpy_generic(obj: Any) -> bool: """ Check if an object is a numpy generic (scalar) type. This function can be used without importing numpy when it is not available. """ return (np := sys.modules.get("numpy")) is not None and isinstance(obj, np.generic) # Designed to work with Expression and VariableParameter class OperatorMixin: def _to_expr(self) -> str: return repr(self) def _from_expr(self, expr) -> Any: return expr def __add__(self, other): comp_value = BinaryExpression("+", self, other) return self._from_expr(comp_value) def __radd__(self, other): comp_value = BinaryExpression("+", other, self) return self._from_expr(comp_value) def __sub__(self, other): comp_value = BinaryExpression("-", self, other) return self._from_expr(comp_value) def __rsub__(self, other): comp_value = BinaryExpression("-", other, self) return self._from_expr(comp_value) def __mul__(self, other): comp_value = BinaryExpression("*", self, other) return self._from_expr(comp_value) def __rmul__(self, other): comp_value = BinaryExpression("*", other, self) return self._from_expr(comp_value) def __truediv__(self, other): comp_value = BinaryExpression("/", self, other) return self._from_expr(comp_value) def __rtruediv__(self, other): comp_value = BinaryExpression("/", other, self) return self._from_expr(comp_value) __div__ = __truediv__ __rdiv__ = __rtruediv__ def __mod__(self, other): comp_value = BinaryExpression("%", self, other) return self._from_expr(comp_value) def __rmod__(self, other): comp_value = BinaryExpression("%", other, self) return self._from_expr(comp_value) def __pow__(self, other): # "**" Javascript operator is not supported in all browsers comp_value = FunctionExpression("pow", (self, other)) return self._from_expr(comp_value) def __rpow__(self, other): # "**" Javascript operator is not supported in all browsers comp_value = FunctionExpression("pow", (other, self)) return self._from_expr(comp_value) def __neg__(self): comp_value = UnaryExpression("-", self) return self._from_expr(comp_value) def __pos__(self): comp_value = UnaryExpression("+", self) return self._from_expr(comp_value) # comparison operators def __eq__(self, other): comp_value = BinaryExpression("===", self, other) return self._from_expr(comp_value) def __ne__(self, other): comp_value = BinaryExpression("!==", self, other) return self._from_expr(comp_value) def __gt__(self, other): comp_value = BinaryExpression(">", self, other) return self._from_expr(comp_value) def __lt__(self, other): comp_value = BinaryExpression("<", self, other) return self._from_expr(comp_value) def __ge__(self, other): comp_value = BinaryExpression(">=", self, other) return self._from_expr(comp_value) def __le__(self, other): comp_value = BinaryExpression("<=", self, other) return self._from_expr(comp_value) def __abs__(self): comp_value = FunctionExpression("abs", (self,)) return self._from_expr(comp_value) # logical operators def __and__(self, other): comp_value = BinaryExpression("&&", self, other) return self._from_expr(comp_value) def __rand__(self, other): comp_value = BinaryExpression("&&", other, self) return self._from_expr(comp_value) def __or__(self, other): comp_value = BinaryExpression("||", self, other) return self._from_expr(comp_value) def __ror__(self, other): comp_value = BinaryExpression("||", other, self) return self._from_expr(comp_value) def __invert__(self): comp_value = UnaryExpression("!", self) return self._from_expr(comp_value) class Expression(OperatorMixin, SchemaBase): """ Expression. Base object for enabling build-up of Javascript expressions using a Python syntax. Calling ``repr(obj)`` will return a Javascript representation of the object and the operations it encodes. """ _schema = {"type": "string"} def to_dict(self, *args, **kwargs): return repr(self) def __setattr__(self, attr, val) -> None: # We don't need the setattr magic defined in SchemaBase return object.__setattr__(self, attr, val) # item access def __getitem__(self, val): return GetItemExpression(self, val) class UnaryExpression(Expression): def __init__(self, op, val) -> None: super().__init__(op=op, val=val) def __repr__(self): return f"({self.op}{_js_repr(self.val)})" class BinaryExpression(Expression): def __init__(self, op, lhs, rhs) -> None: super().__init__(op=op, lhs=lhs, rhs=rhs) def __repr__(self): return f"({_js_repr(self.lhs)} {self.op} {_js_repr(self.rhs)})" class FunctionExpression(Expression): def __init__(self, name, args) -> None: super().__init__(name=name, args=args) def __repr__(self): args = ",".join(_js_repr(arg) for arg in self.args) return f"{self.name}({args})" class ConstExpression(Expression): def __init__(self, name) -> None: super().__init__(name=name) def __repr__(self) -> str: return str(self.name) class GetAttrExpression(Expression): def __init__(self, group, name) -> None: super().__init__(group=group, name=name) def __repr__(self): return f"{self.group}.{self.name}" class GetItemExpression(Expression): def __init__(self, group, name) -> None: super().__init__(group=group, name=name) def __repr__(self) -> str: return f"{self.group}[{self.name!r}]" IntoExpression: TypeAlias = Union[ "PrimitiveValue_T", dt.date, dt.datetime, OperatorMixin, "Map" ] ================================================ FILE: altair/expr/funcs.py ================================================ from __future__ import annotations FUNCTION_LISTING = { "isArray": r"Returns true if _value_ is an array, false otherwise.", "isBoolean": r"Returns true if _value_ is a boolean (`true` or `false`), false otherwise.", "isDate": r"Returns true if _value_ is a Date object, false otherwise. This method will return false for timestamp numbers or date-formatted strings; it recognizes Date objects only.", "isDefined": r"Returns true if _value_ is a defined value, false if _value_ equals `undefined`. This method will return true for `null` and `NaN` values.", "isNumber": r"Returns true if _value_ is a number, false otherwise. `NaN` and `Infinity` are considered numbers.", "isObject": r"Returns true if _value_ is an object (including arrays and Dates), false otherwise.", "isRegExp": r"Returns true if _value_ is a RegExp (regular expression) object, false otherwise.", "isString": r"Returns true if _value_ is a string, false otherwise.", "isValid": r"Returns true if _value_ is not `null`, `undefined`, or `NaN`, false otherwise.", "toBoolean": r"Coerces the input _value_ to a string. Null values and empty strings are mapped to `null`.", "toDate": r"Coerces the input _value_ to a Date instance. Null values and empty strings are mapped to `null`. If an optional _parser_ function is provided, it is used to perform date parsing, otherwise `Date.parse` is used. Be aware that `Date.parse` has different implementations across browsers!", "toNumber": r"Coerces the input _value_ to a number. Null values and empty strings are mapped to `null`.", "toString": r"Coerces the input _value_ to a string. Null values and empty strings are mapped to `null`.", "if": r"If _test_ is truthy, returns _thenValue_. Otherwise, returns _elseValue_. The _if_ function is equivalent to the ternary operator `a ? b : c`.", "isNaN": r"Returns true if _value_ is not a number. Same as JavaScript's `isNaN`.", "isFinite": r"Returns true if _value_ is a finite number. Same as JavaScript's `isFinite`.", "abs": r"Returns the absolute value of _value_. Same as JavaScript's `Math.abs`.", "acos": r"Trigonometric arccosine. Same as JavaScript's `Math.acos`.", "asin": r"Trigonometric arcsine. Same as JavaScript's `Math.asin`.", "atan": r"Trigonometric arctangent. Same as JavaScript's `Math.atan`.", "atan2": r"Returns the arctangent of _dy / dx_. Same as JavaScript's `Math.atan2`.", "ceil": r"Rounds _value_ to the nearest integer of equal or greater value. Same as JavaScript's `Math.ceil`.", "clamp": r"Restricts _value_ to be between the specified _min_ and _max_.", "cos": r"Trigonometric cosine. Same as JavaScript's `Math.cos`.", "exp": r"Returns the value of _e_ raised to the provided _exponent_. Same as JavaScript's `Math.exp`.", "floor": r"Rounds _value_ to the nearest integer of equal or lower value. Same as JavaScript's `Math.floor`.", "hypot": r"Returns the square root of the sum of squares of its arguments. Same as JavaScript's `Math.hypot`.", "log": r"Returns the natural logarithm of _value_. Same as JavaScript's `Math.log`.", "max": r"Returns the maximum argument value. Same as JavaScript's `Math.max`.", "min": r"Returns the minimum argument value. Same as JavaScript's `Math.min`.", "pow": r"Returns _value_ raised to the given _exponent_. Same as JavaScript's `Math.pow`.", "random": r"Returns a pseudo-random number in the range [0,1). Same as JavaScript's `Math.random`.", "round": r"Rounds _value_ to the nearest integer. Same as JavaScript's `Math.round`.", "sin": r"Trigonometric sine. Same as JavaScript's `Math.sin`.", "sqrt": r"Square root function. Same as JavaScript's `Math.sqrt`.", "tan": r"Trigonometric tangent. Same as JavaScript's `Math.tan`.", "sampleNormal": r"Returns a sample from a univariate [normal (Gaussian) probability distribution](https://en.wikipedia.org/wiki/Normal_distribution) with specified _mean_ and standard deviation _stdev_. If unspecified, the mean defaults to `0` and the standard deviation defaults to `1`.", "cumulativeNormal": r"Returns the value of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function) at the given input domain _value_ for a normal distribution with specified _mean_ and standard deviation _stdev_. If unspecified, the mean defaults to `0` and the standard deviation defaults to `1`.", "densityNormal": r"Returns the value of the [probability density function](https://en.wikipedia.org/wiki/Probability_density_function) at the given input domain _value_, for a normal distribution with specified _mean_ and standard deviation _stdev_. If unspecified, the mean defaults to `0` and the standard deviation defaults to `1`.", "quantileNormal": r"Returns the quantile value (the inverse of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function)) for the given input _probability_, for a normal distribution with specified _mean_ and standard deviation _stdev_. If unspecified, the mean defaults to `0` and the standard deviation defaults to `1`.", "sampleLogNormal": r"Returns a sample from a univariate [log-normal probability distribution](https://en.wikipedia.org/wiki/Log-normal_distribution) with specified log _mean_ and log standard deviation _stdev_. If unspecified, the log mean defaults to `0` and the log standard deviation defaults to `1`.", "cumulativeLogNormal": r"Returns the value of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function) at the given input domain _value_ for a log-normal distribution with specified log _mean_ and log standard deviation _stdev_. If unspecified, the log mean defaults to `0` and the log standard deviation defaults to `1`.", "densityLogNormal": r"Returns the value of the [probability density function](https://en.wikipedia.org/wiki/Probability_density_function) at the given input domain _value_, for a log-normal distribution with specified log _mean_ and log standard deviation _stdev_. If unspecified, the log mean defaults to `0` and the log standard deviation defaults to `1`.", "quantileLogNormal": r"Returns the quantile value (the inverse of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function)) for the given input _probability_, for a log-normal distribution with specified log _mean_ and log standard deviation _stdev_. If unspecified, the log mean defaults to `0` and the log standard deviation defaults to `1`.", "sampleUniform": r"Returns a sample from a univariate [continuous uniform probability distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)) over the interval [_min_, _max_). If unspecified, _min_ defaults to `0` and _max_ defaults to `1`. If only one argument is provided, it is interpreted as the _max_ value.", "cumulativeUniform": r"Returns the value of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function) at the given input domain _value_ for a uniform distribution over the interval [_min_, _max_). If unspecified, _min_ defaults to `0` and _max_ defaults to `1`. If only one argument is provided, it is interpreted as the _max_ value.", "densityUniform": r"Returns the value of the [probability density function](https://en.wikipedia.org/wiki/Probability_density_function) at the given input domain _value_, for a uniform distribution over the interval [_min_, _max_). If unspecified, _min_ defaults to `0` and _max_ defaults to `1`. If only one argument is provided, it is interpreted as the _max_ value.", "quantileUniform": r"Returns the quantile value (the inverse of the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function)) for the given input _probability_, for a uniform distribution over the interval [_min_, _max_). If unspecified, _min_ defaults to `0` and _max_ defaults to `1`. If only one argument is provided, it is interpreted as the _max_ value.", "now": r"Returns the timestamp for the current time.", "datetime": r"Returns a new `Date` instance. The _month_ is 0-based, such that `1` represents February.", "date": r"Returns the day of the month for the given _datetime_ value, in local time.", "day": r"Returns the day of the week for the given _datetime_ value, in local time.", "dayofyear": r"Returns the one-based day of the year for the given _datetime_ value, in local time.", "year": r"Returns the year for the given _datetime_ value, in local time.", "quarter": r"Returns the quarter of the year (0-3) for the given _datetime_ value, in local time.", "month": r"Returns the (zero-based) month for the given _datetime_ value, in local time.", "week": r"Returns the week number of the year for the given _datetime_, in local time. This function assumes Sunday-based weeks. Days before the first Sunday of the year are considered to be in week 0, the first Sunday of the year is the start of week 1, the second Sunday week 2, _etc._.", "hours": r"Returns the hours component for the given _datetime_ value, in local time.", "minutes": r"Returns the minutes component for the given _datetime_ value, in local time.", "seconds": r"Returns the seconds component for the given _datetime_ value, in local time.", "milliseconds": r"Returns the milliseconds component for the given _datetime_ value, in local time.", "time": r"Returns the epoch-based timestamp for the given _datetime_ value.", "timezoneoffset": r"Returns the timezone offset from the local timezone to UTC for the given _datetime_ value.", "timeOffset": r"Returns a new `Date` instance that offsets the given _date_ by the specified time [_unit_](../api/time/#time-units) in the local timezone. The optional _step_ argument indicates the number of time unit steps to offset by (default 1).", "timeSequence": r"Returns an array of `Date` instances from _start_ (inclusive) to _stop_ (exclusive), with each entry separated by the given time [_unit_](../api/time/#time-units) in the local timezone. The optional _step_ argument indicates the number of time unit steps to take between each sequence entry (default 1).", "utc": r"Returns a timestamp for the given UTC date. The _month_ is 0-based, such that `1` represents February.", "utcdate": r"Returns the day of the month for the given _datetime_ value, in UTC time.", "utcday": r"Returns the day of the week for the given _datetime_ value, in UTC time.", "utcdayofyear": r"Returns the one-based day of the year for the given _datetime_ value, in UTC time.", "utcyear": r"Returns the year for the given _datetime_ value, in UTC time.", "utcquarter": r"Returns the quarter of the year (0-3) for the given _datetime_ value, in UTC time.", "utcmonth": r"Returns the (zero-based) month for the given _datetime_ value, in UTC time.", "utcweek": r"Returns the week number of the year for the given _datetime_, in UTC time. This function assumes Sunday-based weeks. Days before the first Sunday of the year are considered to be in week 0, the first Sunday of the year is the start of week 1, the second Sunday week 2, _etc._.", "utchours": r"Returns the hours component for the given _datetime_ value, in UTC time.", "utcminutes": r"Returns the minutes component for the given _datetime_ value, in UTC time.", "utcseconds": r"Returns the seconds component for the given _datetime_ value, in UTC time.", "utcmilliseconds": r"Returns the milliseconds component for the given _datetime_ value, in UTC time.", "utcOffset": r"Returns a new `Date` instance that offsets the given _date_ by the specified time [_unit_](../api/time/#time-units) in UTC time. The optional _step_ argument indicates the number of time unit steps to offset by (default 1).", "utcSequence": r"Returns an array of `Date` instances from _start_ (inclusive) to _stop_ (exclusive), with each entry separated by the given time [_unit_](../api/time/#time-units) in UTC time. The optional _step_ argument indicates the number of time unit steps to take between each sequence entry (default 1).", "extent": r"Returns a new _[min, max]_ array with the minimum and maximum values of the input array, ignoring `null`, `undefined`, and `NaN` values.", "clampRange": r"Clamps a two-element _range_ array in a span-preserving manner. If the span of the input _range_ is less than _(max - min)_ and an endpoint exceeds either the _min_ or _max_ value, the range is translated such that the span is preserved and one endpoint touches the boundary of the _[min, max]_ range. If the span exceeds _(max - min)_, the range _[min, max]_ is returned.", "indexof": r"Returns the first index of _value_ in the input _array_, or the first index of _substring_ in the input _string_..", "inrange": r"Tests whether _value_ lies within (or is equal to either) the first and last values of the _range_ array.", "join": r"Returns a new string by concatenating all of the elements of the input _array_, separated by commas or a specified _separator_ string.", "lastindexof": r"Returns the last index of _value_ in the input _array_, or the last index of _substring_ in the input _string_..", "length": r"Returns the length of the input _array_, or the length of the input _string_.", "lerp": r"Returns the linearly interpolated value between the first and last entries in the _array_ for the provided interpolation _fraction_ (typically between 0 and 1). For example, `lerp([0, 50], 0.5)` returns 25.", "peek": r"Returns the last element in the input _array_. Similar to the built-in `Array.pop` method, except that it does not remove the last element. This method is a convenient shorthand for `array[array.length - 1]`.", "pluck": r"Retrieves the value for the specified *field* from a given *array* of objects. The input *field* string may include nested properties (e.g., `foo.bar.bz`).", "reverse": r"Returns a new array with elements in a reverse order of the input _array_. The first array element becomes the last, and the last array element becomes the first.", "sequence": r"Returns an array containing an arithmetic sequence of numbers. If _step_ is omitted, it defaults to 1. If _start_ is omitted, it defaults to 0. The _stop_ value is exclusive; it is not included in the result. If _step_ is positive, the last element is the largest _start + i * step_ less than _stop_; if _step_ is negative, the last element is the smallest _start + i * step_ greater than _stop_. If the returned array would contain an infinite number of values, an empty range is returned. The arguments are not required to be integers.", "slice": r"Returns a section of _array_ between the _start_ and _end_ indices. If the _end_ argument is negative, it is treated as an offset from the end of the array (_length(array) + end_).", "span": r"Returns the span of _array_: the difference between the last and first elements, or _array[array.length-1] - array[0]_. Or if input is a string: a section of _string_ between the _start_ and _end_ indices. If the _end_ argument is negative, it is treated as an offset from the end of the string (_length(string) + end_)..", "lower": r"Transforms _string_ to lower-case letters.", "pad": r"Pads a _string_ value with repeated instances of a _character_ up to a specified _length_. If _character_ is not specified, a space (' ') is used. By default, padding is added to the end of a string. An optional _align_ parameter specifies if padding should be added to the `'left'` (beginning), `'center'`, or `'right'` (end) of the input string.", "parseFloat": r"Parses the input _string_ to a floating-point value. Same as JavaScript's `parseFloat`.", "parseInt": r"Parses the input _string_ to an integer value. Same as JavaScript's `parseInt`.", "replace": r"Returns a new string with some or all matches of _pattern_ replaced by a _replacement_ string. The _pattern_ can be a string or a regular expression. If _pattern_ is a string, only the first instance will be replaced. Same as [JavaScript's String.replace](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace).", "split": r"Returns an array of tokens created by splitting the input _string_ according to a provided _separator_ pattern. The result can optionally be constrained to return at most _limit_ tokens.", "substring": r"Returns a section of _string_ between the _start_ and _end_ indices.", "trim": r"Returns a trimmed string with preceding and trailing whitespace removed.", "truncate": r"Truncates an input _string_ to a target _length_. The optional _align_ argument indicates what part of the string should be truncated: `'left'` (the beginning), `'center'`, or `'right'` (the end). By default, the `'right'` end of the string is truncated. The optional _ellipsis_ argument indicates the string to use to indicate truncated content; by default the ellipsis character `...` (`\\u2026`) is used.", "upper": r"Transforms _string_ to upper-case letters.", "merge": r"Merges the input objects _object1_, _object2_, etc into a new output object. Inputs are visited in sequential order, such that key values from later arguments can overwrite those from earlier arguments. Example: `merge({a:1, b:2}, {a:3}) -> {a:3, b:2}`.", "dayFormat": r"Formats a (0-6) _weekday_ number as a full week day name, according to the current locale. For example: `dayFormat(0) -> \"Sunday\"`.", "dayAbbrevFormat": r"Formats a (0-6) _weekday_ number as an abbreviated week day name, according to the current locale. For example: `dayAbbrevFormat(0) -> \"Sun\"`.", "format": r"Formats a numeric _value_ as a string. The _specifier_ must be a valid [d3-format specifier](https://github.com/d3/d3-format/) (e.g., `format(value, ',.2f')`.", "monthFormat": r"Formats a (zero-based) _month_ number as a full month name, according to the current locale. For example: `monthFormat(0) -> \"January\"`.", "monthAbbrevFormat": r"Formats a (zero-based) _month_ number as an abbreviated month name, according to the current locale. For example: `monthAbbrevFormat(0) -> \"Jan\"`.", "timeUnitSpecifier": r"Returns a time format specifier string for the given time [_units_](../api/time/#time-units). The optional _specifiers_ object provides a set of specifier sub-strings for customizing the format; for more, see the [timeUnitSpecifier API documentation](../api/time/#timeUnitSpecifier). The resulting specifier string can then be used as input to the [timeFormat](#timeFormat) or [utcFormat](#utcFormat) functions, or as the _format_ parameter of an axis or legend. For example: `timeFormat(date, timeUnitSpecifier('year'))` or `timeFormat(date, timeUnitSpecifier(['hours', 'minutes']))`.", "timeFormat": r"Formats a datetime _value_ (either a `Date` object or timestamp) as a string, according to the local time. The _specifier_ must be a valid [d3-time-format specifier](https://github.com/d3/d3-time-format/). For example: `timeFormat(timestamp, '%A')`.", "timeParse": r"Parses a _string_ value to a Date object, according to the local time. The _specifier_ must be a valid [d3-time-format specifier](https://github.com/d3/d3-time-format/). For example: `timeParse('June 30, 2015', '%B %d, %Y')`.", "utcFormat": r"Formats a datetime _value_ (either a `Date` object or timestamp) as a string, according to [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) time. The _specifier_ must be a valid [d3-time-format specifier](https://github.com/d3/d3-time-format/). For example: `utcFormat(timestamp, '%A')`.", "utcParse": r"Parses a _string_ value to a Date object, according to [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) time. The _specifier_ must be a valid [d3-time-format specifier](https://github.com/d3/d3-time-format/). For example: `utcParse('June 30, 2015', '%B %d, %Y')`.", "regexp": r"Creates a regular expression instance from an input _pattern_ string and optional _flags_. Same as [JavaScript's `RegExp`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp).", "test": r"Evaluates a regular expression _regexp_ against the input _string_, returning `true` if the string matches the pattern, `false` otherwise. For example: `test(/\\d{3}/, \"32-21-9483\") -> true`.", "rgb": r"Constructs a new [RGB](https://en.wikipedia.org/wiki/RGB_color_model) color. If _r_, _g_ and _b_ are specified, these represent the channel values of the returned color; an _opacity_ may also be specified. If a CSS Color Module Level 3 _specifier_ string is specified, it is parsed and then converted to the RGB color space. Uses [d3-color's rgb function](https://github.com/d3/d3-color#rgb).", "hsl": r"Constructs a new [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV) color. If _h_, _s_ and _l_ are specified, these represent the channel values of the returned color; an _opacity_ may also be specified. If a CSS Color Module Level 3 _specifier_ string is specified, it is parsed and then converted to the HSL color space. Uses [d3-color's hsl function](https://github.com/d3/d3-color#hsl).", "lab": r"Constructs a new [CIE LAB](https://en.wikipedia.org/wiki/Lab_color_space#CIELAB) color. If _l_, _a_ and _b_ are specified, these represent the channel values of the returned color; an _opacity_ may also be specified. If a CSS Color Module Level 3 _specifier_ string is specified, it is parsed and then converted to the LAB color space. Uses [d3-color's lab function](https://github.com/d3/d3-color#lab).", "hcl": r"Constructs a new [HCL](https://en.wikipedia.org/wiki/Lab_color_space#CIELAB) (hue, chroma, luminance) color. If _h_, _c_ and _l_ are specified, these represent the channel values of the returned color; an _opacity_ may also be specified. If a CSS Color Module Level 3 _specifier_ string is specified, it is parsed and then converted to the HCL color space. Uses [d3-color's hcl function](https://github.com/d3/d3-color#hcl).", "luminance": r"Returns the luminance for the given color _specifier_ (compatible with [d3-color's rgb function](https://github.com/d3/d3-color#rgb)). The luminance is calculated according to the [W3C Web Content Accessibility Guidelines](https://www.w3.org/TR/2008/REC-WCAG20-20081211/#relativeluminancedef).", "contrast": r"Returns the contrast ratio between the input color specifiers as a float between 1 and 21. The contrast is calculated according to the [W3C Web Content Accessibility Guidelines](https://www.w3.org/TR/2008/REC-WCAG20-20081211/#contrast-ratiodef).", "item": r"Returns the current scenegraph item that is the target of the event.", "group": r"Returns the scenegraph group mark item in which the current event has occurred. If no arguments are provided, the immediate parent group is returned. If a group name is provided, the matching ancestor group item is returned.", "xy": r"Returns the x- and y-coordinates for the current event as a two-element array. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph _item_ (or string group name) is provided, the coordinate space of the group item is used.", "x": r"Returns the x coordinate for the current event. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph _item_ (or string group name) is provided, the coordinate space of the group item is used.", "y": r"Returns the y coordinate for the current event. If no arguments are provided, the top-level coordinate space of the view is used. If a scenegraph _item_ (or string group name) is provided, the coordinate space of the group item is used.", "pinchDistance": r"Returns the pixel distance between the first two touch points of a multi-touch event.", "pinchAngle": r"Returns the angle of the line connecting the first two touch points of a multi-touch event.", "inScope": r"Returns true if the given scenegraph _item_ is a descendant of the group mark in which the event handler was defined, false otherwise.", "data": r"Returns the array of data objects for the Vega data set with the given _name_. If the data set is not found, returns an empty array.", "indata": r"Tests if the data set with a given _name_ contains a datum with a _field_ value that matches the input _value_. For example: `indata('table', 'category', value)`.", "scale": r"Applies the named scale transform (or projection) to the specified _value_. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection.", "invert": r"Inverts the named scale transform (or projection) for the specified _value_. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection.", "copy": r"Returns a copy (a new cloned instance) of the named scale transform of projection, or `undefined` if no scale or projection is found. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale or projection.", "domain": r"Returns the scale domain array for the named scale transform, or an empty array if the scale is not found. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale.", "range": r"Returns the scale range array for the named scale transform, or an empty array if the scale is not found. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale.", "bandwidth": r"Returns the current band width for the named band scale transform, or zero if the scale is not found or is not a band scale. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the scale.", "bandspace": r"Returns the number of steps needed within a band scale, based on the _count_ of domain elements and the inner and outer padding values. While normally calculated within the scale itself, this function can be helpful for determining the size of a chart's layout.", "gradient": r"Returns a linear color gradient for the _scale_ (whose range must be a [continuous color scheme](../schemes)) and starting and ending points _p0_ and _p1_, each an _[x, y]_ array. The points _p0_ and _p1_ should be expressed in normalized coordinates in the domain [0, 1], relative to the bounds of the item being colored. If unspecified, _p0_ defaults to `[0, 0]` and _p1_ defaults to `[1, 0]`, for a horizontal gradient that spans the full bounds of an item. The optional _count_ argument indicates a desired target number of sample points to take from the color scale.", "panLinear": r"Given a linear scale _domain_ array with numeric or datetime values, returns a new two-element domain array that is the result of panning the domain by a fractional _delta_. The _delta_ value represents fractional units of the scale range; for example, `0.5` indicates panning the scale domain to the right by half the scale range.", "panLog": r"Given a log scale _domain_ array with numeric or datetime values, returns a new two-element domain array that is the result of panning the domain by a fractional _delta_. The _delta_ value represents fractional units of the scale range; for example, `0.5` indicates panning the scale domain to the right by half the scale range.", "panPow": r"Given a power scale _domain_ array with numeric or datetime values and the given _exponent_, returns a new two-element domain array that is the result of panning the domain by a fractional _delta_. The _delta_ value represents fractional units of the scale range; for example, `0.5` indicates panning the scale domain to the right by half the scale range.", "panSymlog": r"Given a symmetric log scale _domain_ array with numeric or datetime values parameterized by the given _constant_, returns a new two-element domain array that is the result of panning the domain by a fractional _delta_. The _delta_ value represents fractional units of the scale range; for example, `0.5` indicates panning the scale domain to the right by half the scale range.", "zoomLinear": r"Given a linear scale _domain_ array with numeric or datetime values, returns a new two-element domain array that is the result of zooming the domain by a _scaleFactor_, centered at the provided fractional _anchor_. The _anchor_ value represents the zoom position in terms of fractional units of the scale range; for example, `0.5` indicates a zoom centered on the mid-point of the scale range.", "zoomLog": r"Given a log scale _domain_ array with numeric or datetime values, returns a new two-element domain array that is the result of zooming the domain by a _scaleFactor_, centered at the provided fractional _anchor_. The _anchor_ value represents the zoom position in terms of fractional units of the scale range; for example, `0.5` indicates a zoom centered on the mid-point of the scale range.", "zoomPow": r"Given a power scale _domain_ array with numeric or datetime values and the given _exponent_, returns a new two-element domain array that is the result of zooming the domain by a _scaleFactor_, centered at the provided fractional _anchor_. The _anchor_ value represents the zoom position in terms of fractional units of the scale range; for example, `0.5` indicates a zoom centered on the mid-point of the scale range.", "zoomSymlog": r"Given a symmetric log scale _domain_ array with numeric or datetime values parameterized by the given _constant_, returns a new two-element domain array that is the result of zooming the domain by a _scaleFactor_, centered at the provided fractional _anchor_. The _anchor_ value represents the zoom position in terms of fractional units of the scale range; for example, `0.5` indicates a zoom centered on the mid-point of the scale range.", "geoArea": r"Returns the projected planar area (typically in square pixels) of a GeoJSON _feature_ according to the named _projection_. If the _projection_ argument is `null`, computes the spherical area in steradians using unprojected longitude, latitude coordinates. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's [geoArea](https://github.com/d3/d3-geo#geoArea) and [path.area](https://github.com/d3/d3-geo#path_area) methods.", "geoBounds": r"Returns the projected planar bounding box (typically in pixels) for the specified GeoJSON _feature_, according to the named _projection_. The bounding box is represented by a two-dimensional array: [[_x0_, _y0_], [_x1_, _y1_]], where _x0_ is the minimum x-coordinate, _y0_ is the minimum y-coordinate, _x1_ is the maximum x-coordinate, and _y1_ is the maximum y-coordinate. If the _projection_ argument is `null`, computes the spherical bounding box using unprojected longitude, latitude coordinates. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's [geoBounds](https://github.com/d3/d3-geo#geoBounds) and [path.bounds](https://github.com/d3/d3-geo#path_bounds) methods.", "geoCentroid": r"Returns the projected planar centroid (typically in pixels) for the specified GeoJSON _feature_, according to the named _projection_. If the _projection_ argument is `null`, computes the spherical centroid using unprojected longitude, latitude coordinates. The optional _group_ argument takes a scenegraph group mark item to indicate the specific scope in which to look up the projection. Uses d3-geo's [geoCentroid](https://github.com/d3/d3-geo#geoCentroid) and [path.centroid](https://github.com/d3/d3-geo#path_centroid) methods.", "treePath": r"For the hierarchy data set with the given _name_, returns the shortest path through from the _source_ node id to the _target_ node id. The path starts at the _source_ node, ascends to the least common ancestor of the _source_ node and the _target_ node, and then descends to the _target_ node.", "treeAncestors": r"For the hierarchy data set with the given _name_, returns the array of ancestors nodes, starting with the input _node_, then followed by each parent up to the root.", "containerSize": r"Returns the current CSS box size (`[el.clientWidth, el.clientHeight]`) of the parent DOM element that contains the Vega view. If there is no container element, returns `[undefined, undefined]`.", "screen": r"Returns the [`window.screen`](https://developer.mozilla.org/en-US/docs/Web/API/Window/screen) object, or `{}` if Vega is not running in a browser environment.", "windowSize": r"Returns the current window size (`[window.innerWidth, window.innerHeight]`) or `[undefined, undefined]` if Vega is not running in a browser environment.", "warn": r"Logs a warning message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set.", "info": r"Logs an informative message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set.", "debug": r"Logs a debugging message and returns the last argument. For the message to appear in the console, the visualization view must have the appropriate logging level set.", } # This maps vega expression function names to the Python name NAME_MAP = {"if": "if_"} ================================================ FILE: altair/jupyter/__init__.py ================================================ try: import anywidget # noqa: F401 except ImportError: # When anywidget isn't available, create stand-in JupyterChart class # that raises an informative import error on construction. This # way we can make JupyterChart available in the altair namespace # when anywidget is not installed class JupyterChart: def __init__(self, *args, **kwargs): msg = ( "The Altair JupyterChart requires the anywidget \n" "Python package which may be installed using pip with\n" " pip install anywidget\n" "or using conda with\n" " conda install -c conda-forge anywidget\n" "Afterwards, you will need to restart your Python kernel." ) raise ImportError(msg) else: from .jupyter_chart import JupyterChart # noqa: F401 ================================================ FILE: altair/jupyter/js/README.md ================================================ # JupyterChart This directory contains the JavaScript portion of the Altair `JupyterChart`. The `JupyterChart` is based on the [AnyWidget](https://anywidget.dev/) project. ================================================ FILE: altair/jupyter/js/index.js ================================================ import vegaEmbed from "https://esm.sh/vega-embed@v7?deps=vega@6&deps=vega-lite@6.1.0"; import lodashDebounce from "https://esm.sh/lodash-es@4.17.21/debounce"; // Note: For offline support, the import lines above are removed and the remaining script // is bundled using vl-convert's javascript_bundle function. See the documentation of // the javascript_bundle function for details on the available imports and their names. // If an additional import is required in the future, it will need to be added to vl-convert // in order to preserve offline support. async function render({ model, el }) { let finalize; function showError(error){ el.innerHTML = ( '
' + '

JavaScript Error: ' + error.message + '

' + "

This usually means there's a typo in your chart specification. " + "See the javascript console for the full traceback.

" + '
' ); } const reembed = async () => { if (finalize != null) { finalize(); } model.set("local_tz", Intl.DateTimeFormat().resolvedOptions().timeZone); let spec = structuredClone(model.get("spec")); if (spec == null) { // Remove any existing chart and return while (el.firstChild) { el.removeChild(el.lastChild); } model.save_changes(); return; } let embedOptions = structuredClone(model.get("embed_options")) ?? undefined; let api; try { api = await vegaEmbed(el, spec, embedOptions); } catch (error) { showError(error) return; } finalize = api.finalize; // Debounce config const wait = model.get("debounce_wait") ?? 10; const debounceOpts = {leading: false, trailing: true}; if (model.get("max_wait") ?? true) { debounceOpts["maxWait"] = wait; } const initialSelections = {}; for (const selectionName of Object.keys(model.get("_vl_selections"))) { const storeName = `${selectionName}_store`; const selectionHandler = (_, value) => { const newSelections = cleanJson(model.get("_vl_selections") ?? {}); const store = cleanJson(api.view.data(storeName) ?? []); newSelections[selectionName] = {value, store}; model.set("_vl_selections", newSelections); model.save_changes(); }; api.view.addSignalListener(selectionName, lodashDebounce(selectionHandler, wait, debounceOpts)); initialSelections[selectionName] = { value: cleanJson(api.view.signal(selectionName) ?? {}), store: cleanJson(api.view.data(storeName) ?? []) } } model.set("_vl_selections", initialSelections); const initialParams = {}; for (const paramName of Object.keys(model.get("_params"))) { const paramHandler = (_, value) => { const newParams = JSON.parse(JSON.stringify(model.get("_params"))) || {}; newParams[paramName] = value; model.set("_params", newParams); model.save_changes(); }; api.view.addSignalListener(paramName, lodashDebounce(paramHandler, wait, debounceOpts)); initialParams[paramName] = api.view.signal(paramName) ?? null } model.set("_params", initialParams); model.save_changes(); // Param change callback model.on('change:_params', async (new_params) => { for (const [param, value] of Object.entries(new_params.changed ? new_params.changed._params : new_params)) { api.view.signal(param, value); } await api.view.runAsync(); }); // Add signal/data listeners for (const watch of model.get("_js_watch_plan") ?? []) { if (watch.namespace === "data") { const dataHandler = (_, value) => { model.set("_js_to_py_updates", [{ namespace: "data", name: watch.name, scope: watch.scope, value: cleanJson(value) }]); model.save_changes(); }; addDataListener(api.view, watch.name, watch.scope, lodashDebounce(dataHandler, wait, debounceOpts)) } else if (watch.namespace === "signal") { const signalHandler = (_, value) => { model.set("_js_to_py_updates", [{ namespace: "signal", name: watch.name, scope: watch.scope, value: cleanJson(value) }]); model.save_changes(); }; addSignalListener(api.view, watch.name, watch.scope, lodashDebounce(signalHandler, wait, debounceOpts)) } } // Add signal/data updaters model.on('change:_py_to_js_updates', async (updates) => { const py_to_js_updates = updates.changed ? updates.changed._py_to_js_updates : updates; for (const update of py_to_js_updates ?? []) { if (update.namespace === "signal") { setSignalValue(api.view, update.name, update.scope, update.value); } else if (update.namespace === "data") { setDataValue(api.view, update.name, update.scope, update.value); } } await api.view.runAsync(); }); } model.on('change:spec', reembed); model.on('change:embed_options', reembed); model.on('change:debounce_wait', reembed); model.on('change:max_wait', reembed); await reembed(); } function cleanJson(data) { return JSON.parse(JSON.stringify(data)) } function getNestedRuntime(view, scope) { var runtime = view._runtime; for (const index of scope) { runtime = runtime.subcontext[index]; } return runtime } function lookupSignalOp(view, name, scope) { let parent_runtime = getNestedRuntime(view, scope); return parent_runtime.signals[name] ?? null; } function dataRef(view, name, scope) { let parent_runtime = getNestedRuntime(view, scope); return parent_runtime.data[name]; } export function setSignalValue(view, name, scope, value) { let signal_op = lookupSignalOp(view, name, scope); view.update(signal_op, value); } export function setDataValue(view, name, scope, value) { let dataset = dataRef(view, name, scope); let changeset = view.changeset().remove(() => true).insert(value) dataset.modified = true; view.pulse(dataset.input, changeset); } export function addSignalListener(view, name, scope, handler) { let signal_op = lookupSignalOp(view, name, scope); return addOperatorListener( view, name, signal_op, handler, ); } export function addDataListener(view, name, scope, handler) { let dataset = dataRef(view, name, scope).values; return addOperatorListener( view, name, dataset, handler, ); } // Private helpers from Vega for dealing with nested signals/data function findOperatorHandler(op, handler) { const h = (op._targets || []) .filter(op => op._update && op._update.handler === handler); return h.length ? h[0] : null; } function addOperatorListener(view, name, op, handler) { let h = findOperatorHandler(op, handler); if (!h) { h = trap(view, () => handler(name, op.value)); h.handler = handler; view.on(op, null, h); } return view; } function trap(view, fn) { return !fn ? null : function() { try { fn.apply(this, arguments); } catch (error) { view.error(error); } }; } export default { render } ================================================ FILE: altair/jupyter/jupyter_chart.py ================================================ from __future__ import annotations import json import pathlib from typing import Any import anywidget import traitlets import altair as alt from altair import TopLevelSpec from altair.utils._vegafusion_data import ( compile_to_vegafusion_chart_state, using_vegafusion, ) from altair.utils.selection import IndexSelection, IntervalSelection, PointSelection _here = pathlib.Path(__file__).parent class Params(traitlets.HasTraits): """Traitlet class storing a JupyterChart's params.""" def __init__(self, trait_values): super().__init__() for key, value in trait_values.items(): if isinstance(value, (int, float)): traitlet_type = traitlets.Float() elif isinstance(value, str): traitlet_type = traitlets.Unicode() elif isinstance(value, list): traitlet_type = traitlets.List() elif isinstance(value, dict): traitlet_type = traitlets.Dict() else: traitlet_type = traitlets.Any() # Add the new trait. self.add_traits(**{key: traitlet_type}) # Set the trait's value. setattr(self, key, value) def __repr__(self): return f"Params({self.trait_values()})" class Selections(traitlets.HasTraits): """Traitlet class storing a JupyterChart's selections.""" def __init__(self, trait_values): super().__init__() for key, value in trait_values.items(): if isinstance(value, IndexSelection): traitlet_type = traitlets.Instance(IndexSelection) elif isinstance(value, PointSelection): traitlet_type = traitlets.Instance(PointSelection) elif isinstance(value, IntervalSelection): traitlet_type = traitlets.Instance(IntervalSelection) else: msg = f"Unexpected selection type: {type(value)}" raise ValueError(msg) # Add the new trait. self.add_traits(**{key: traitlet_type}) # Set the trait's value. setattr(self, key, value) # Make read-only self.observe(self._make_read_only, names=key) def __repr__(self): return f"Selections({self.trait_values()})" def _make_read_only(self, change): """Work around to make traits read-only, but still allow us to change them internally.""" if change["name"] in self.traits() and change["old"] != change["new"]: self._set_value(change["name"], change["old"]) msg = ( "Selections may not be set from Python.\n" f"Attempted to set select: {change['name']}" ) raise ValueError(msg) def _set_value(self, key, value): self.unobserve(self._make_read_only, names=key) setattr(self, key, value) self.observe(self._make_read_only, names=key) def load_js_src() -> str: return (_here / "js" / "index.js").read_text() class JupyterChart(anywidget.AnyWidget): _esm = load_js_src() _css = r""" .vega-embed { /* Make sure action menu isn't cut off */ overflow: visible; } """ # Public traitlets chart = traitlets.Instance(TopLevelSpec, allow_none=True) spec = traitlets.Dict(allow_none=True).tag(sync=True) debounce_wait = traitlets.Float(default_value=10).tag(sync=True) max_wait = traitlets.Bool(default_value=True).tag(sync=True) local_tz = traitlets.Unicode(default_value=None, allow_none=True).tag(sync=True) debug = traitlets.Bool(default_value=False) embed_options = traitlets.Dict(default_value=None, allow_none=True).tag(sync=True) # Internal selection traitlets _selection_types = traitlets.Dict() _vl_selections = traitlets.Dict().tag(sync=True) # Internal param traitlets _params = traitlets.Dict().tag(sync=True) # Internal comm traitlets for VegaFusion support _chart_state = traitlets.Any(allow_none=True) _js_watch_plan = traitlets.Any(allow_none=True).tag(sync=True) _js_to_py_updates = traitlets.Any(allow_none=True).tag(sync=True) _py_to_js_updates = traitlets.Any(allow_none=True).tag(sync=True) # Track whether charts are configured for offline use _is_offline = False @classmethod def enable_offline(cls, offline: bool = True): """ Configure JupyterChart's offline behavior. Parameters ---------- offline: bool If True, configure JupyterChart to operate in offline mode where JavaScript dependencies are loaded from vl-convert. If False, configure it to operate in online mode where JavaScript dependencies are loaded from CDN dynamically. This is the default behavior. """ from altair.utils._importers import import_vl_convert, vl_version_for_vl_convert if offline: if cls._is_offline: # Already offline return vlc = import_vl_convert() src_lines = load_js_src().split("\n") # Remove leading lines with only whitespace, comments, or imports while src_lines and ( len(src_lines[0].strip()) == 0 or src_lines[0].startswith("import") or src_lines[0].startswith("//") ): src_lines.pop(0) src = "\n".join(src_lines) # vl-convert's javascript_bundle function creates a self-contained JavaScript bundle # for JavaScript snippets that import from a small set of dependencies that # vl-convert includes. To see the available imports and their imported names, run # import vl_convert as vlc # help(vlc.javascript_bundle) bundled_src = vlc.javascript_bundle( src, vl_version=vl_version_for_vl_convert() ) cls._esm = bundled_src cls._is_offline = True else: cls._esm = load_js_src() cls._is_offline = False def __init__( self, chart: TopLevelSpec, debounce_wait: int = 10, max_wait: bool = True, debug: bool = False, embed_options: dict | None = None, **kwargs: Any, ): """ Jupyter Widget for displaying and updating Altair Charts, and retrieving selection and parameter values. Parameters ---------- chart: Chart Altair Chart instance debounce_wait: int Debouncing wait time in milliseconds. Updates will be sent from the client to the kernel after debounce_wait milliseconds of no chart interactions. max_wait: bool If True (default), updates will be sent from the client to the kernel every debounce_wait milliseconds even if there are ongoing chart interactions. If False, updates will not be sent until chart interactions have completed. debug: bool If True, debug messages will be printed embed_options: dict Options to pass to vega-embed. See https://github.com/vega/vega-embed?tab=readme-ov-file#options """ self.params = Params({}) self.selections = Selections({}) super().__init__( chart=chart, debounce_wait=debounce_wait, max_wait=max_wait, debug=debug, embed_options=embed_options, **kwargs, ) @traitlets.observe("chart") def _on_change_chart(self, change): # noqa: C901 """Updates the JupyterChart's internal state when the wrapped Chart instance changes.""" new_chart = change.new selection_watches = [] selection_types = {} initial_params = {} initial_vl_selections = {} empty_selections = {} if new_chart is None: with self.hold_sync(): self.spec = None self._selection_types = selection_types self._vl_selections = initial_vl_selections self._params = initial_params return params = getattr(new_chart, "params", []) if params is not alt.Undefined: for param in new_chart.params: if isinstance(param.name, alt.ParameterName): clean_name = param.name.to_json().strip('"') else: clean_name = param.name select = getattr(param, "select", alt.Undefined) if select != alt.Undefined: if not isinstance(select, dict): select = select.to_dict() select_type = select["type"] if select_type == "point": if not ( select.get("fields", None) or select.get("encodings", None) ): # Point selection with no associated fields or encodings specified. # This is an index-based selection selection_types[clean_name] = "index" empty_selections[clean_name] = IndexSelection( name=clean_name, value=[], store=[] ) else: selection_types[clean_name] = "point" empty_selections[clean_name] = PointSelection( name=clean_name, value=[], store=[] ) elif select_type == "interval": selection_types[clean_name] = "interval" empty_selections[clean_name] = IntervalSelection( name=clean_name, value={}, store=[] ) else: msg = f"Unexpected selection type {select.type}" raise ValueError(msg) selection_watches.append(clean_name) initial_vl_selections[clean_name] = {"value": None, "store": []} else: clean_value = param.value if param.value != alt.Undefined else None initial_params[clean_name] = clean_value # Handle the params generated by transforms for param_name in collect_transform_params(new_chart): initial_params[param_name] = None # Setup params self.params = Params(initial_params) def on_param_traitlet_changed(param_change): new_params = dict(self._params) new_params[param_change["name"]] = param_change["new"] self._params = new_params self.params.observe(on_param_traitlet_changed) # Setup selections self.selections = Selections(empty_selections) # Update properties all together with self.hold_sync(): if using_vegafusion(): if self.local_tz is None: self.spec = None def on_local_tz_change(change): self._init_with_vegafusion(change["new"]) self.observe(on_local_tz_change, ["local_tz"]) else: self._init_with_vegafusion(self.local_tz) else: self.spec = new_chart.to_dict() self._selection_types = selection_types self._vl_selections = initial_vl_selections self._params = initial_params def _init_with_vegafusion(self, local_tz: str): if self.chart is not None: vegalite_spec = self.chart.to_dict(context={"pre_transform": False}) with self.hold_sync(): self._chart_state = compile_to_vegafusion_chart_state( vegalite_spec, local_tz ) self._js_watch_plan = self._chart_state.get_watch_plan()[ "client_to_server" ] self.spec = self._chart_state.get_transformed_spec() # Callback to update chart state and send updates back to client def on_js_to_py_updates(change): if self.debug: updates_str = json.dumps(change["new"], indent=2) print( f"JavaScript to Python VegaFusion updates:\n {updates_str}" ) updates = self._chart_state.update(change["new"]) if self.debug: updates_str = json.dumps(updates, indent=2) print( f"Python to JavaScript VegaFusion updates:\n {updates_str}" ) self._py_to_js_updates = updates self.observe(on_js_to_py_updates, ["_js_to_py_updates"]) @traitlets.observe("_params") def _on_change_params(self, change): for param_name, value in change.new.items(): setattr(self.params, param_name, value) @traitlets.observe("_vl_selections") def _on_change_selections(self, change): """Updates the JupyterChart's public selections traitlet in response to changes that the JavaScript logic makes to the internal _selections traitlet.""" for selection_name, selection_dict in change.new.items(): value = selection_dict["value"] store = selection_dict["store"] selection_type = self._selection_types[selection_name] if selection_type == "index": self.selections._set_value( selection_name, IndexSelection.from_vega(selection_name, signal=value, store=store), ) elif selection_type == "point": self.selections._set_value( selection_name, PointSelection.from_vega(selection_name, signal=value, store=store), ) elif selection_type == "interval": self.selections._set_value( selection_name, IntervalSelection.from_vega( selection_name, signal=value, store=store ), ) def collect_transform_params(chart: TopLevelSpec) -> set[str]: """ Collect the names of params that are defined by transforms. Parameters ---------- chart: Chart from which to extract transform params Returns ------- set of param names """ transform_params = set() # Handle recursive case for prop in ("layer", "concat", "hconcat", "vconcat"): for child in getattr(chart, prop, []): transform_params.update(collect_transform_params(child)) # Handle chart's own transforms transforms = getattr(chart, "transform", []) transforms = transforms if transforms != alt.Undefined else [] for tx in transforms: if hasattr(tx, "param"): transform_params.add(tx.param) return transform_params ================================================ FILE: altair/py.typed ================================================ ================================================ FILE: altair/theme.py ================================================ """Customizing chart configuration defaults.""" from __future__ import annotations from typing import TYPE_CHECKING, Any from typing import overload as _overload from altair.vegalite.v6.schema._config import ( AreaConfigKwds, AutoSizeParamsKwds, AxisConfigKwds, AxisResolveMapKwds, BarConfigKwds, BindCheckboxKwds, BindDirectKwds, BindInputKwds, BindRadioSelectKwds, BindRangeKwds, BoxPlotConfigKwds, BrushConfigKwds, CompositionConfigKwds, ConfigKwds, DateTimeKwds, DerivedStreamKwds, ErrorBandConfigKwds, ErrorBarConfigKwds, FeatureGeometryGeoJsonPropertiesKwds, FormatConfigKwds, GeoJsonFeatureCollectionKwds, GeoJsonFeatureKwds, GeometryCollectionKwds, GradientStopKwds, HeaderConfigKwds, IntervalSelectionConfigKwds, IntervalSelectionConfigWithoutTypeKwds, LegendConfigKwds, LegendResolveMapKwds, LegendStreamBindingKwds, LinearGradientKwds, LineConfigKwds, LineStringKwds, LocaleKwds, MarkConfigKwds, MergedStreamKwds, MultiLineStringKwds, MultiPointKwds, MultiPolygonKwds, NumberLocaleKwds, OverlayMarkDefKwds, PaddingKwds, PointKwds, PointSelectionConfigKwds, PointSelectionConfigWithoutTypeKwds, PolygonKwds, ProjectionConfigKwds, ProjectionKwds, RadialGradientKwds, RangeConfigKwds, RectConfigKwds, ResolveKwds, RowColKwds, ScaleConfigKwds, ScaleInvalidDataConfigKwds, ScaleResolveMapKwds, SelectionConfigKwds, StepKwds, StyleConfigIndexKwds, ThemeConfig, TickConfigKwds, TimeIntervalStepKwds, TimeLocaleKwds, TitleConfigKwds, TitleParamsKwds, TooltipContentKwds, TopLevelSelectionParameterKwds, VariableParameterKwds, ViewBackgroundKwds, ViewConfigKwds, ) from altair.vegalite.v6.theme import themes as _themes if TYPE_CHECKING: import sys from collections.abc import Callable from typing import Any, Literal if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString from altair.utils.plugin_registry import Plugin __all__ = [ "AreaConfigKwds", "AutoSizeParamsKwds", "AxisConfigKwds", "AxisResolveMapKwds", "BarConfigKwds", "BindCheckboxKwds", "BindDirectKwds", "BindInputKwds", "BindRadioSelectKwds", "BindRangeKwds", "BoxPlotConfigKwds", "BrushConfigKwds", "CompositionConfigKwds", "ConfigKwds", "DateTimeKwds", "DerivedStreamKwds", "ErrorBandConfigKwds", "ErrorBarConfigKwds", "FeatureGeometryGeoJsonPropertiesKwds", "FormatConfigKwds", "GeoJsonFeatureCollectionKwds", "GeoJsonFeatureKwds", "GeometryCollectionKwds", "GradientStopKwds", "HeaderConfigKwds", "IntervalSelectionConfigKwds", "IntervalSelectionConfigWithoutTypeKwds", "LegendConfigKwds", "LegendResolveMapKwds", "LegendStreamBindingKwds", "LineConfigKwds", "LineStringKwds", "LinearGradientKwds", "LocaleKwds", "MarkConfigKwds", "MergedStreamKwds", "MultiLineStringKwds", "MultiPointKwds", "MultiPolygonKwds", "NumberLocaleKwds", "OverlayMarkDefKwds", "PaddingKwds", "PointKwds", "PointSelectionConfigKwds", "PointSelectionConfigWithoutTypeKwds", "PolygonKwds", "ProjectionConfigKwds", "ProjectionKwds", "RadialGradientKwds", "RangeConfigKwds", "RectConfigKwds", "ResolveKwds", "RowColKwds", "ScaleConfigKwds", "ScaleInvalidDataConfigKwds", "ScaleResolveMapKwds", "SelectionConfigKwds", "StepKwds", "StyleConfigIndexKwds", "ThemeConfig", "TickConfigKwds", "TimeIntervalStepKwds", "TimeLocaleKwds", "TitleConfigKwds", "TitleParamsKwds", "TooltipContentKwds", "TopLevelSelectionParameterKwds", "VariableParameterKwds", "ViewBackgroundKwds", "ViewConfigKwds", "active", "enable", "get", "names", "options", "register", "unregister", ] def register( name: LiteralString, *, enable: bool ) -> Callable[[Plugin[ThemeConfig]], Plugin[ThemeConfig]]: """ Decorator for registering a theme function. Parameters ---------- name Unique name assigned in registry. enable Auto-enable the wrapped theme. Examples -------- Register and enable a theme:: import altair as alt from altair import theme @theme.register("param_font_size", enable=True) def custom_theme() -> theme.ThemeConfig: sizes = 12, 14, 16, 18, 20 return { "autosize": {"contains": "content", "resize": True}, "background": "#F3F2F1", "config": { "axisX": {"labelFontSize": sizes[1], "titleFontSize": sizes[1]}, "axisY": {"labelFontSize": sizes[1], "titleFontSize": sizes[1]}, "font": "'Lato', 'Segoe UI', Tahoma, Verdana, sans-serif", "headerColumn": {"labelFontSize": sizes[1]}, "headerFacet": {"labelFontSize": sizes[1]}, "headerRow": {"labelFontSize": sizes[1]}, "legend": {"labelFontSize": sizes[0], "titleFontSize": sizes[1]}, "text": {"fontSize": sizes[0]}, "title": {"fontSize": sizes[-1]}, }, "height": {"step": 28}, "width": 350, } We can then see the ``name`` parameter displayed when checking:: theme.active "param_font_size" Until another theme has been enabled, all charts will use defaults set in ``custom_theme()``:: from altair.datasets import data source = data.stocks() lines = ( alt.Chart(source, title=alt.Title("Stocks")) .mark_line() .encode(x="date:T", y="price:Q", color="symbol:N") ) lines.interactive(bind_y=False) """ # HACK: See for `LiteralString` requirement in `name` # https://github.com/vega/altair/pull/3526#discussion_r1743350127 def decorate(func: Plugin[ThemeConfig], /) -> Plugin[ThemeConfig]: _register(name, func) if enable: _themes.enable(name) return func return decorate def unregister(name: LiteralString) -> Plugin[ThemeConfig]: """ Remove and return a previously registered theme. Parameters ---------- name Unique name assigned during ``alt.theme.register``. Raises ------ TypeError When ``name`` has not been registered. """ plugin = _register(name, None) if plugin is None: msg = ( f"Found no theme named {name!r} in registry.\n" f"Registered themes:\n" f"{names()!r}" ) raise TypeError(msg) else: return plugin enable = _themes.enable get = _themes.get names = _themes.names active: str """Return the name of the currently active theme.""" options: dict[str, Any] """Return the current themes options dictionary.""" def __dir__() -> list[str]: return __all__ @_overload def __getattr__(name: Literal["active"]) -> str: ... # type: ignore[misc] @_overload def __getattr__(name: Literal["options"]) -> dict[str, Any]: ... # type: ignore[misc] def __getattr__(name: str) -> Any: if name == "active": return _themes.active elif name == "options": return _themes.options else: msg = f"module {__name__!r} has no attribute {name!r}" raise AttributeError(msg) def _register( name: LiteralString, fn: Plugin[ThemeConfig] | None, / ) -> Plugin[ThemeConfig] | None: if fn is None: return _themes._plugins.pop(name, None) elif _themes.plugin_type(fn): _themes._plugins[name] = fn return fn else: msg = f"{type(fn).__name__!r} is not a callable theme\n\n{fn!r}" raise TypeError(msg) ================================================ FILE: altair/typing/__init__.py ================================================ """Public types to ease integrating with `altair`.""" from __future__ import annotations __all__ = [ "ChannelAngle", "ChannelColor", "ChannelColumn", "ChannelDescription", "ChannelDetail", "ChannelFacet", "ChannelFill", "ChannelFillOpacity", "ChannelHref", "ChannelKey", "ChannelLatitude", "ChannelLatitude2", "ChannelLongitude", "ChannelLongitude2", "ChannelOpacity", "ChannelOrder", "ChannelRadius", "ChannelRadius2", "ChannelRow", "ChannelShape", "ChannelSize", "ChannelStroke", "ChannelStrokeDash", "ChannelStrokeOpacity", "ChannelStrokeWidth", "ChannelText", "ChannelTheta", "ChannelTheta2", "ChannelTooltip", "ChannelUrl", "ChannelX", "ChannelX2", "ChannelXError", "ChannelXError2", "ChannelXOffset", "ChannelY", "ChannelY2", "ChannelYError", "ChannelYError2", "ChannelYOffset", "ChartType", "EncodeKwds", "Optional", "is_chart_type", ] from altair.utils.schemapi import Optional from altair.vegalite.v6.api import ChartType, is_chart_type from altair.vegalite.v6.schema.channels import ( ChannelAngle, ChannelColor, ChannelColumn, ChannelDescription, ChannelDetail, ChannelFacet, ChannelFill, ChannelFillOpacity, ChannelHref, ChannelKey, ChannelLatitude, ChannelLatitude2, ChannelLongitude, ChannelLongitude2, ChannelOpacity, ChannelOrder, ChannelRadius, ChannelRadius2, ChannelRow, ChannelShape, ChannelSize, ChannelStroke, ChannelStrokeDash, ChannelStrokeOpacity, ChannelStrokeWidth, ChannelText, ChannelTheta, ChannelTheta2, ChannelTooltip, ChannelUrl, ChannelX, ChannelX2, ChannelXError, ChannelXError2, ChannelXOffset, ChannelY, ChannelY2, ChannelYError, ChannelYError2, ChannelYOffset, EncodeKwds, ) ================================================ FILE: altair/utils/__init__.py ================================================ from .core import ( SHORTHAND_KEYS, display_traceback, infer_encoding_types, infer_vegalite_type_for_pandas, parse_shorthand, sanitize_narwhals_dataframe, sanitize_pandas_dataframe, update_nested, use_signature, use_signature_func, ) from .deprecation import AltairDeprecationWarning, deprecated, deprecated_warn from .html import spec_to_html from .plugin_registry import PluginRegistry from .schemapi import ( VERSIONS, Optional, SchemaBase, SchemaLike, Undefined, is_undefined, ) __all__ = ( "SHORTHAND_KEYS", "VERSIONS", "AltairDeprecationWarning", "Optional", "PluginRegistry", "SchemaBase", "SchemaLike", "Undefined", "deprecated", "deprecated_warn", "display_traceback", "infer_encoding_types", "infer_vegalite_type_for_pandas", "is_undefined", "parse_shorthand", "sanitize_narwhals_dataframe", "sanitize_pandas_dataframe", "spec_to_html", "update_nested", "use_signature", "use_signature_func", ) ================================================ FILE: altair/utils/_dfi_types.py ================================================ # DataFrame Interchange Protocol Types # Copied from https://data-apis.org/dataframe-protocol/latest/API.html, # changed ABCs to Protocols, and subset the type hints to only those that are # relevant for Altair. # # These classes are only for use in type signatures from __future__ import annotations import enum from typing import TYPE_CHECKING, Any, Protocol if TYPE_CHECKING: from collections.abc import Iterable class DtypeKind(enum.IntEnum): """ Integer enum for data types. Attributes ---------- INT : int Matches to signed integer data type. UINT : int Matches to unsigned integer data type. FLOAT : int Matches to floating point data type. BOOL : int Matches to boolean data type. STRING : int Matches to string data type (UTF-8 encoded). DATETIME : int Matches to datetime data type. CATEGORICAL : int Matches to categorical data type. """ INT = 0 UINT = 1 FLOAT = 2 BOOL = 20 STRING = 21 # UTF-8 DATETIME = 22 CATEGORICAL = 23 # Type hint of first element would actually be DtypeKind but can't use that # as other libraries won't use an instance of our own Enum in this module but have # their own. Type checkers will raise an error on that even though the enums # are identical. class Column(Protocol): @property def dtype(self) -> tuple[Any, int, str, str]: """ Dtype description as a tuple ``(kind, bit-width, format string, endianness)``. Bit-width : the number of bits as an integer Format string : data type description format string in Apache Arrow C Data Interface format. Endianness : current only native endianness (``=``) is supported Notes ----- - Kind specifiers are aligned with DLPack where possible (hence the jump to 20, leave enough room for future extension) - Masks must be specified as boolean with either bit width 1 (for bit masks) or 8 (for byte masks). - Dtype width in bits was preferred over bytes - Endianness isn't too useful, but included now in case in the future we need to support non-native endianness - Went with Apache Arrow format strings over NumPy format strings because they're more complete from a dataframe perspective - Format strings are mostly useful for datetime specification, and for categoricals. - For categoricals, the format string describes the type of the categorical in the data buffer. In case of a separate encoding of the categorical (e.g. an integer to string mapping), this can be derived from ``self.describe_categorical``. - Data types not included: complex, Arrow-style null, binary, decimal, and nested (list, struct, map, union) dtypes. """ ... # Have to use a generic Any return type as not all libraries who implement # the dataframe interchange protocol implement the TypedDict that is usually # returned here in the same way. As TypedDicts are invariant, even a slight change # will lead to an error by a type checker. See PR in which this code was added # for details. @property def describe_categorical(self) -> Any: """ If the dtype is categorical, there are two options. - There are only values in the data buffer. - There is a separate non-categorical Column encoding categorical values. Raises TypeError if the dtype is not categorical Returns the dictionary with description on how to interpret the data buffer: - "is_ordered" : bool, whether the ordering of dictionary indices is semantically meaningful. - "is_dictionary" : bool, whether a mapping of categorical values to other objects exists - "categories" : Column representing the (implicit) mapping of indices to category values (e.g. an array of cat1, cat2, ...). None if not a dictionary-style categorical. TBD: are there any other in-memory representations that are needed? """ ... class DataFrame(Protocol): """ A data frame class, with only the methods required by the interchange protocol defined. A "data frame" represents an ordered collection of named columns. A column's "name" must be a unique string. Columns may be accessed by name or by position. This could be a public data frame class, or an object with the methods and attributes defined on this DataFrame class could be returned from the ``__dataframe__`` method of a public data frame class in a library adhering to the dataframe interchange protocol specification. """ def __dataframe__( self, nan_as_null: bool = False, allow_copy: bool = True ) -> DataFrame: """ Construct a new exchange object, potentially changing the parameters. ``nan_as_null`` is a keyword intended for the consumer to tell the producer to overwrite null values in the data with ``NaN``. It is intended for cases where the consumer does not support the bit mask or byte mask that is the producer's native representation. ``allow_copy`` is a keyword that defines whether or not the library is allowed to make a copy of the data. For example, copying data would be necessary if a library supports strided buffers, given that this protocol specifies contiguous buffers. """ ... def column_names(self) -> Iterable[str]: """Return an iterator yielding the column names.""" ... def get_column_by_name(self, name: str) -> Column: """Return the column whose name is the indicated name.""" ... def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: """ Return an iterator yielding the chunks. By default (None), yields the chunks that the data is stored as by the producer. If given, ``n_chunks`` must be a multiple of ``self.num_chunks()``, meaning the producer must subdivide each chunk before yielding it. Note that the producer must ensure that all columns are chunked the same way. """ ... ================================================ FILE: altair/utils/_importers.py ================================================ from __future__ import annotations from importlib.metadata import version as importlib_version from typing import TYPE_CHECKING from packaging.version import Version from altair.utils.schemapi import VERSIONS if TYPE_CHECKING: from types import ModuleType def import_vegafusion() -> ModuleType: min_version = VERSIONS["vegafusion"] try: version = importlib_version("vegafusion") if Version(version) < Version(min_version): msg = ( f"The vegafusion package must be version {min_version} or greater. " f"Found version {version}" ) raise RuntimeError(msg) import vegafusion as vf return vf except ImportError as err: msg = ( 'The "vegafusion" data transformer and chart.transformed_data feature requires\n' f"version {min_version} or greater of the 'vegafusion' package.\n" "This can be installed with pip using:\n" f' pip install "vegafusion>={min_version}"\n' "or conda:\n" f' conda install -c conda-forge "vegafusion>={min_version}"\n\n' f"ImportError: {err.args[0]}" ) raise ImportError(msg) from err def import_vl_convert() -> ModuleType: min_version = VERSIONS["vl-convert-python"] try: version = importlib_version("vl-convert-python") if Version(version) < Version(min_version): msg = ( f"The vl-convert-python package must be version {min_version} or greater. " f"Found version {version}" ) raise RuntimeError(msg) import vl_convert as vlc return vlc except ImportError as err: msg = ( f"The vl-convert Vega-Lite compiler and file export feature requires\n" f"version {min_version} or greater of the 'vl-convert-python' package. \n" f"This can be installed with pip using:\n" f' pip install "vl-convert-python>={min_version}"\n' "or conda:\n" f' conda install -c conda-forge "vl-convert-python>={min_version}"\n\n' f"ImportError: {err.args[0]}" ) raise ImportError(msg) from err def vl_version_for_vl_convert() -> str: from altair.vegalite import SCHEMA_VERSION # Compute VlConvert's vl_version string (of the form 'v5_2') # from SCHEMA_VERSION (of the form 'v5.2.0') return "_".join(SCHEMA_VERSION.split(".")[:2]) def import_pyarrow_interchange() -> ModuleType: min_version = "11.0.0" try: version = importlib_version("pyarrow") if Version(version) < Version(min_version): msg = ( f"The pyarrow package must be version {min_version} or greater. " f"Found version {version}" ) raise RuntimeError(msg) import pyarrow.interchange as pi return pi except ImportError as err: msg = ( f"Usage of the DataFrame Interchange Protocol requires\n" f"version {min_version} or greater of the pyarrow package. \n" f"This can be installed with pip using:\n" f' pip install "pyarrow>={min_version}"\n' "or conda:\n" f' conda install -c conda-forge "pyarrow>={min_version}"\n\n' f"ImportError: {err.args[0]}" ) raise ImportError(msg) from err def pyarrow_available() -> bool: try: import_pyarrow_interchange() return True except (ImportError, RuntimeError): return False ================================================ FILE: altair/utils/_show.py ================================================ from __future__ import annotations import webbrowser from http.server import BaseHTTPRequestHandler, HTTPServer from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterable def open_html_in_browser( html: str | bytes, using: str | Iterable[str] | None = None, port: int | None = None, ) -> None: """ Display an html document in a web browser without creating a temp file. Instantiates a simple http server and uses the webbrowser module to open the server's URL Parameters ---------- html: str HTML string to display using: str or iterable of str Name of the web browser to open (e.g. "chrome", "firefox", etc.). If an iterable, choose the first browser available on the system. If none, choose the system default browser. port: int Port to use. Defaults to a random port """ # Encode html to bytes html_bytes = html.encode("utf8") if isinstance(html, str) else html browser = None if using is None: browser = webbrowser.get(None) else: # normalize using to an iterable if isinstance(using, str): using = [using] for browser_key in using: try: browser = webbrowser.get(browser_key) if browser is not None: break except webbrowser.Error: pass if browser is None: raise ValueError("Failed to locate a browser with name in " + str(using)) class OneShotRequestHandler(BaseHTTPRequestHandler): def do_GET(self) -> None: self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() bufferSize = 1024 * 1024 for i in range(0, len(html_bytes), bufferSize): self.wfile.write(html_bytes[i : i + bufferSize]) def log_message(self, format, *args): # Silence stderr logging pass # Use specified port if provided, otherwise choose a random port (port value of 0) server = HTTPServer( ("127.0.0.1", port if port is not None else 0), OneShotRequestHandler ) browser.open(f"http://127.0.0.1:{server.server_port}") server.handle_request() ================================================ FILE: altair/utils/_transformed_data.py ================================================ from __future__ import annotations from typing import TYPE_CHECKING, Any, overload from altair import ( Chart, ConcatChart, ConcatSpecGenericSpec, FacetChart, FacetedUnitSpec, FacetSpec, HConcatChart, HConcatSpecGenericSpec, LayerChart, LayerSpec, NonNormalizedSpec, TopLevelConcatSpec, TopLevelFacetSpec, TopLevelHConcatSpec, TopLevelLayerSpec, TopLevelUnitSpec, TopLevelVConcatSpec, UnitSpec, UnitSpecWithFrame, VConcatChart, VConcatSpecGenericSpec, data_transformers, ) from altair.utils._vegafusion_data import get_inline_tables, import_vegafusion from altair.utils.schemapi import Undefined if TYPE_CHECKING: from collections.abc import Iterable from typing import TypeAlias from altair.typing import ChartType from altair.utils.core import DataFrameLike Scope: TypeAlias = tuple[int, ...] FacetMapping: TypeAlias = dict[tuple[str, Scope], tuple[str, Scope]] # For the transformed_data functionality, the chart classes in the values # can be considered equivalent to the chart class in the key. _chart_class_mapping = { Chart: ( Chart, TopLevelUnitSpec, FacetedUnitSpec, UnitSpec, UnitSpecWithFrame, NonNormalizedSpec, ), LayerChart: (LayerChart, TopLevelLayerSpec, LayerSpec), ConcatChart: (ConcatChart, TopLevelConcatSpec, ConcatSpecGenericSpec), HConcatChart: (HConcatChart, TopLevelHConcatSpec, HConcatSpecGenericSpec), VConcatChart: (VConcatChart, TopLevelVConcatSpec, VConcatSpecGenericSpec), FacetChart: (FacetChart, TopLevelFacetSpec, FacetSpec), } @overload def transformed_data( chart: Chart | FacetChart, row_limit: int | None = None, exclude: Iterable[str] | None = None, ) -> DataFrameLike | None: ... @overload def transformed_data( chart: LayerChart | HConcatChart | VConcatChart | ConcatChart, row_limit: int | None = None, exclude: Iterable[str] | None = None, ) -> list[DataFrameLike]: ... def transformed_data(chart, row_limit=None, exclude=None): """ Evaluate a Chart's transforms. Evaluate the data transforms associated with a Chart and return the transformed data as one or more DataFrames Parameters ---------- chart : Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart Altair chart to evaluate transforms on row_limit : int (optional) Maximum number of rows to return for each DataFrame. None (default) for unlimited exclude : iterable of str Set of the names of charts to exclude Returns ------- DataFrame or list of DataFrames or None If input chart is a Chart or Facet Chart, returns a DataFrame of the transformed data. Otherwise, returns a list of DataFrames of the transformed data """ vf = import_vegafusion() # Add mark if none is specified to satisfy Vega-Lite if isinstance(chart, Chart) and chart.mark == Undefined: chart = chart.mark_point() # Deep copy chart so that we can rename marks without affecting caller chart = chart.copy(deep=True) # Ensure that all views are named so that we can look them up in the # resulting Vega specification chart_names = name_views(chart, 0, exclude=exclude) # Compile to Vega and extract inline DataFrames with data_transformers.enable("vegafusion"): vega_spec = chart.to_dict(format="vega", context={"pre_transform": False}) inline_datasets = get_inline_tables(vega_spec) # Build mapping from mark names to vega datasets facet_mapping = get_facet_mapping(vega_spec) dataset_mapping = get_datasets_for_view_names(vega_spec, chart_names, facet_mapping) # Build a list of vega dataset names that corresponds to the order # of the chart components dataset_names = [] for chart_name in chart_names: if chart_name in dataset_mapping: dataset_names.append(dataset_mapping[chart_name]) else: msg = "Failed to locate all datasets" raise ValueError(msg) # Extract transformed datasets with VegaFusion datasets, _ = vf.runtime.pre_transform_datasets( vega_spec, dataset_names, row_limit=row_limit, inline_datasets=inline_datasets, ) if isinstance(chart, (Chart, FacetChart)): # Return DataFrame (or None if it was excluded) if input was a simple Chart if not datasets: return None else: return datasets[0] else: # Otherwise return the list of DataFrames return datasets # The equivalent classes from _chart_class_mapping should also be added # to the type hints below for `chart` as the function would also work for them. # However, this was not possible so far as mypy then complains about # "Overloaded function signatures 1 and 2 overlap with incompatible return types [misc]" # This might be due to the complex type hierarchy of the chart classes. # See also https://github.com/python/mypy/issues/5119 # and https://github.com/python/mypy/issues/4020 which show that mypy might not have # a very consistent behavior for overloaded functions. # The same error appeared when trying it with Protocols for the concat and layer charts. # This function is only used internally and so we accept this inconsistency for now. def _assign_chart_name(chart: ChartType) -> None: """Assign a name to a chart if it doesn't have one.""" if chart.name in {None, Undefined}: # Use hash-based naming for Altair Chart objects if hasattr(chart, "_get_view_hash_name"): chart.name = chart._get_view_hash_name() else: # For Vega-Lite schema objects (UnitSpec, FacetedUnitSpec, etc.), # use simple naming since these are already unique by design chart_type = chart.__class__.__name__.lower() # Clean up the type name for readability chart_type = ( chart_type.replace("spec", "") .replace("generic", "") .replace("concat", "") ) chart_type = chart_type.removesuffix("_") # Use object ID for uniqueness - these objects are already unique chart.name = f"view_{chart_type}_{id(chart):x}" def _get_subcharts(chart: ChartType) -> list[Any]: """Get the subcharts for a composite chart.""" if isinstance(chart, _chart_class_mapping[LayerChart]): return chart.layer elif isinstance(chart, _chart_class_mapping[HConcatChart]): return chart.hconcat elif isinstance(chart, _chart_class_mapping[VConcatChart]): return chart.vconcat elif isinstance(chart, _chart_class_mapping[ConcatChart]): return chart.concat else: msg = ( "transformed_data accepts an instance of " "Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart\n" f"Received value of type: {type(chart)}" ) raise ValueError(msg) def name_views( chart: ChartType, i: int = 0, exclude: Iterable[str] | None = None ) -> list[str]: """ Name unnamed chart views. Name unnamed charts views so that we can look them up later in the compiled Vega spec. Note: This function mutates the input chart by applying names to unnamed views. Parameters ---------- chart : Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart Altair chart to apply names to i : int (default 0) Starting chart index exclude : iterable of str Names of charts to exclude Returns ------- list of str List of the names of the charts and subcharts """ exclude = set(exclude) if exclude is not None else set() # Handle simple charts (Chart and FacetChart) if isinstance( chart, (_chart_class_mapping[Chart], _chart_class_mapping[FacetChart]) ): if chart.name not in exclude: _assign_chart_name(chart) return [chart.name] return [] # Handle composite charts subcharts = _get_subcharts(chart) chart_names: list[str] = [] for subchart in subcharts: for name in name_views(subchart, i=i + len(chart_names), exclude=exclude): chart_names.append(name) return chart_names def get_group_mark_for_scope( vega_spec: dict[str, Any], scope: Scope ) -> dict[str, Any] | None: """ Get the group mark at a particular scope. Parameters ---------- vega_spec : dict Top-level Vega specification dictionary scope : tuple of int Scope tuple. If empty, the original Vega specification is returned. Otherwise, the nested group mark at the scope specified is returned. Returns ------- dict or None Top-level Vega spec (if scope is empty) or group mark (if scope is non-empty) or None (if group mark at scope does not exist) Examples -------- >>> spec = { ... "marks": [ ... {"type": "group", "marks": [{"type": "symbol"}]}, ... {"type": "group", "marks": [{"type": "rect"}]}, ... ] ... } >>> get_group_mark_for_scope(spec, (1,)) {'type': 'group', 'marks': [{'type': 'rect'}]} """ group = vega_spec # Find group at scope for scope_value in scope: group_index = 0 child_group = None for mark in group.get("marks", []): if mark.get("type") == "group": if group_index == scope_value: child_group = mark break group_index += 1 if child_group is None: return None group = child_group return group def get_datasets_for_scope(vega_spec: dict[str, Any], scope: Scope) -> list[str]: """ Get the names of the datasets that are defined at a given scope. Parameters ---------- vega_spec : dict Top-level Vega specification scope : tuple of int Scope tuple. If empty, the names of top-level datasets are returned Otherwise, the names of the datasets defined in the nested group mark at the specified scope are returned. Returns ------- list of str List of the names of the datasets defined at the specified scope Examples -------- >>> spec = { ... "data": [{"name": "data1"}], ... "marks": [ ... { ... "type": "group", ... "data": [{"name": "data2"}], ... "marks": [{"type": "symbol"}], ... }, ... { ... "type": "group", ... "data": [ ... {"name": "data3"}, ... {"name": "data4"}, ... ], ... "marks": [{"type": "rect"}], ... }, ... ], ... } >>> get_datasets_for_scope(spec, ()) ['data1'] >>> get_datasets_for_scope(spec, (0,)) ['data2'] >>> get_datasets_for_scope(spec, (1,)) ['data3', 'data4'] Returns empty when no group mark exists at scope >>> get_datasets_for_scope(spec, (1, 3)) [] """ group = get_group_mark_for_scope(vega_spec, scope) or {} # get datasets from group datasets = [] for dataset in group.get("data", []): datasets.append(dataset["name"]) # Add facet dataset facet_dataset = group.get("from", {}).get("facet", {}).get("name", None) if facet_dataset: datasets.append(facet_dataset) return datasets def get_definition_scope_for_data_reference( vega_spec: dict[str, Any], data_name: str, usage_scope: Scope ) -> Scope | None: """ Return the scope that a dataset is defined at, for a given usage scope. Parameters ---------- vega_spec: dict Top-level Vega specification data_name: str The name of a dataset reference usage_scope: tuple of int The scope that the dataset is referenced in Returns ------- tuple of int The scope where the referenced dataset is defined, or None if no such dataset is found Examples -------- >>> spec = { ... "data": [{"name": "data1"}], ... "marks": [ ... { ... "type": "group", ... "data": [{"name": "data2"}], ... "marks": [ ... { ... "type": "symbol", ... "encode": { ... "update": { ... "x": {"field": "x", "data": "data1"}, ... "y": {"field": "y", "data": "data2"}, ... } ... }, ... } ... ], ... } ... ], ... } data1 is referenced at scope [0] and defined at scope [] >>> get_definition_scope_for_data_reference(spec, "data1", (0,)) () data2 is referenced at scope [0] and defined at scope [0] >>> get_definition_scope_for_data_reference(spec, "data2", (0,)) (0,) If data2 is not visible at scope [] (the top level), because it's defined in scope [0] >>> repr(get_definition_scope_for_data_reference(spec, "data2", ())) 'None' """ for i in reversed(range(len(usage_scope) + 1)): scope = usage_scope[:i] datasets = get_datasets_for_scope(vega_spec, scope) if data_name in datasets: return scope return None def get_facet_mapping(group: dict[str, Any], scope: Scope = ()) -> FacetMapping: """ Create mapping from facet definitions to source datasets. Parameters ---------- group : dict Top-level Vega spec or nested group mark scope : tuple of int Scope of the group dictionary within a top-level Vega spec Returns ------- dict Dictionary from (facet_name, facet_scope) to (dataset_name, dataset_scope) Examples -------- >>> spec = { ... "data": [{"name": "data1"}], ... "marks": [ ... { ... "type": "group", ... "from": { ... "facet": { ... "name": "facet1", ... "data": "data1", ... "groupby": ["colA"], ... } ... }, ... } ... ], ... } >>> get_facet_mapping(spec) {('facet1', (0,)): ('data1', ())} """ facet_mapping = {} group_index = 0 mark_group = get_group_mark_for_scope(group, scope) or {} for mark in mark_group.get("marks", []): if mark.get("type", None) == "group": # Get facet for this group group_scope = (*scope, group_index) facet = mark.get("from", {}).get("facet", None) if facet is not None: facet_name = facet.get("name", None) facet_data = facet.get("data", None) if facet_name is not None and facet_data is not None: definition_scope = get_definition_scope_for_data_reference( group, facet_data, scope ) if definition_scope is not None: facet_mapping[facet_name, group_scope] = ( facet_data, definition_scope, ) # Handle children recursively child_mapping = get_facet_mapping(group, scope=group_scope) facet_mapping.update(child_mapping) group_index += 1 return facet_mapping def get_from_facet_mapping( scoped_dataset: tuple[str, Scope], facet_mapping: FacetMapping ) -> tuple[str, Scope]: """ Apply facet mapping to a scoped dataset. Parameters ---------- scoped_dataset : (str, tuple of int) A dataset name and scope tuple facet_mapping : dict from (str, tuple of int) to (str, tuple of int) The facet mapping produced by get_facet_mapping Returns ------- (str, tuple of int) Dataset name and scope tuple that has been mapped as many times as possible Examples -------- Facet mapping as produced by get_facet_mapping >>> facet_mapping = { ... ("facet1", (0,)): ("data1", ()), ... ("facet2", (0, 1)): ("facet1", (0,)), ... } >>> get_from_facet_mapping(("facet2", (0, 1)), facet_mapping) ('data1', ()) """ while scoped_dataset in facet_mapping: scoped_dataset = facet_mapping[scoped_dataset] return scoped_dataset def get_datasets_for_view_names( group: dict[str, Any], vl_chart_names: list[str], facet_mapping: FacetMapping, scope: Scope = (), ) -> dict[str, tuple[str, Scope]]: """ Get the Vega datasets that correspond to the provided Altair view names. Parameters ---------- group : dict Top-level Vega spec or nested group mark vl_chart_names : list of str List of the Vega-Lite facet_mapping : dict from (str, tuple of int) to (str, tuple of int) The facet mapping produced by get_facet_mapping scope : tuple of int Scope of the group dictionary within a top-level Vega spec Returns ------- dict from str to (str, tuple of int) Dict from Altair view names to scoped datasets """ datasets = {} group_index = 0 mark_group = get_group_mark_for_scope(group, scope) or {} for mark in mark_group.get("marks", []): for vl_chart_name in vl_chart_names: if mark.get("name", "") == f"{vl_chart_name}_cell": data_name = mark.get("from", {}).get("facet", None).get("data", None) scoped_data_name = (data_name, scope) datasets[vl_chart_name] = get_from_facet_mapping( scoped_data_name, facet_mapping ) break name = mark.get("name", "") if mark.get("type", "") == "group": group_data_names = get_datasets_for_view_names( group, vl_chart_names, facet_mapping, scope=(*scope, group_index) ) for k, v in group_data_names.items(): datasets.setdefault(k, v) group_index += 1 else: for vl_chart_name in vl_chart_names: if name.startswith(vl_chart_name) and name.endswith("_marks"): data_name = mark.get("from", {}).get("data", None) scoped_data = get_definition_scope_for_data_reference( group, data_name, scope ) if scoped_data is not None: datasets[vl_chart_name] = get_from_facet_mapping( (data_name, scoped_data), facet_mapping ) break return datasets ================================================ FILE: altair/utils/_vegafusion_data.py ================================================ from __future__ import annotations import uuid from importlib.metadata import version as importlib_version from typing import TYPE_CHECKING, Any, Final, TypedDict, overload from weakref import WeakValueDictionary from narwhals.stable.v1.dependencies import is_into_dataframe from packaging.version import Version from altair.utils._importers import import_vegafusion from altair.utils.core import DataFrameLike from altair.utils.data import ( DataType, MaxRowsError, SupportsGeoInterface, ToValuesReturnType, ) from altair.vegalite.data import default_data_transformer if TYPE_CHECKING: import sys from collections.abc import Callable, MutableMapping from narwhals.stable.v1.typing import IntoDataFrame from vegafusion.runtime import ChartState if sys.version_info >= (3, 13): from typing import TypeIs else: from typing_extensions import TypeIs # Temporary storage for dataframes that have been extracted # from charts by the vegafusion data transformer. Use a WeakValueDictionary # rather than a dict so that the Python interpreter is free to garbage # collect the stored DataFrames. extracted_inline_tables: MutableMapping[str, DataFrameLike] = WeakValueDictionary() # Special URL prefix that VegaFusion uses to denote that a # dataset in a Vega spec corresponds to an entry in the `inline_datasets` # kwarg of vf.runtime.pre_transform_spec(). VEGAFUSION_PREFIX: Final = "vegafusion+dataset://" try: VEGAFUSION_VERSION: Version | None = Version(importlib_version("vegafusion")) except ImportError: VEGAFUSION_VERSION = None if VEGAFUSION_VERSION and Version("2.0.0a0") <= VEGAFUSION_VERSION: def is_supported_by_vf(data: Any) -> TypeIs[DataFrameLike]: # Test whether VegaFusion supports the data type # VegaFusion v2 support narwhals-compatible DataFrames return isinstance(data, DataFrameLike) or is_into_dataframe(data) else: def is_supported_by_vf(data: Any) -> TypeIs[DataFrameLike]: return isinstance(data, DataFrameLike) class _ToVegaFusionReturnUrlDict(TypedDict): url: str _VegaFusionReturnType = _ToVegaFusionReturnUrlDict | ToValuesReturnType @overload def vegafusion_data_transformer( data: None = ..., max_rows: int = ... ) -> Callable[..., Any]: ... @overload def vegafusion_data_transformer( data: DataFrameLike, max_rows: int = ... ) -> ToValuesReturnType: ... @overload def vegafusion_data_transformer( data: dict | IntoDataFrame | SupportsGeoInterface, max_rows: int = ... ) -> _VegaFusionReturnType: ... def vegafusion_data_transformer( data: DataType | None = None, max_rows: int = 100000 ) -> Callable[..., Any] | _VegaFusionReturnType: """VegaFusion Data Transformer.""" if data is None: return vegafusion_data_transformer if is_supported_by_vf(data) and not isinstance(data, SupportsGeoInterface): table_name = f"table_{uuid.uuid4()}".replace("-", "_") extracted_inline_tables[table_name] = data return {"url": VEGAFUSION_PREFIX + table_name} else: # Use default transformer for geo interface objects # # (e.g. a geopandas GeoDataFrame) # Or if we don't recognize data type return default_data_transformer(data) def get_inline_table_names(vega_spec: dict[str, Any]) -> set[str]: """ Get a set of the inline datasets names in the provided Vega spec. Inline datasets are encoded as URLs that start with the table:// prefix. Parameters ---------- vega_spec: dict A Vega specification dict Returns ------- set of str Set of the names of the inline datasets that are referenced in the specification. Examples -------- >>> spec = { ... "data": [ ... {"name": "foo", "url": "https://path/to/file.csv"}, ... {"name": "bar", "url": "vegafusion+dataset://inline_dataset_123"}, ... ] ... } >>> get_inline_table_names(spec) {'inline_dataset_123'} """ table_names = set() # Process datasets for data in vega_spec.get("data", []): url = data.get("url", "") if url.startswith(VEGAFUSION_PREFIX): name = url[len(VEGAFUSION_PREFIX) :] table_names.add(name) # Recursively process child marks, which may have their own datasets for mark in vega_spec.get("marks", []): table_names.update(get_inline_table_names(mark)) return table_names def get_inline_tables(vega_spec: dict[str, Any]) -> dict[str, DataFrameLike]: """ Get the inline tables referenced by a Vega specification. Note: This function should only be called on a Vega spec that corresponds to a chart that was processed by the vegafusion_data_transformer. Furthermore, this function may only be called once per spec because the returned dataframes are deleted from internal storage. Parameters ---------- vega_spec: dict A Vega specification dict Returns ------- dict from str to dataframe dict from inline dataset name to dataframe object """ inline_names = get_inline_table_names(vega_spec) # exclude named dataset that was provided by the user, # or dataframes that have been deleted. table_names = inline_names.intersection(extracted_inline_tables) return {k: extracted_inline_tables.pop(k) for k in table_names} def compile_to_vegafusion_chart_state( vegalite_spec: dict[str, Any], local_tz: str ) -> ChartState: """ Compile a Vega-Lite spec to a VegaFusion ChartState. Note: This function should only be called on a Vega-Lite spec that was generated with the "vegafusion" data transformer enabled. In particular, this spec may contain references to extract datasets using table:// prefixed URLs. Parameters ---------- vegalite_spec: dict A Vega-Lite spec that was generated from an Altair chart with the "vegafusion" data transformer enabled local_tz: str Local timezone name (e.g. 'America/New_York') Returns ------- ChartState A VegaFusion ChartState object """ # Local import to avoid circular ImportError from altair import data_transformers, vegalite_compilers vf = import_vegafusion() # Compile Vega-Lite spec to Vega compiler = vegalite_compilers.get() if compiler is None: msg = "No active vega-lite compiler plugin found" raise ValueError(msg) vega_spec = compiler(vegalite_spec) # Retrieve dict of inline tables referenced by the spec inline_tables = get_inline_tables(vega_spec) # Pre-evaluate transforms in vega spec with vegafusion row_limit = data_transformers.options.get("max_rows", None) chart_state = vf.runtime.new_chart_state( vega_spec, local_tz=local_tz, inline_datasets=inline_tables, row_limit=row_limit, ) # Check from row limit warning and convert to MaxRowsError handle_row_limit_exceeded(row_limit, chart_state.get_warnings()) return chart_state def compile_with_vegafusion(vegalite_spec: dict[str, Any]) -> dict[str, Any]: """ Compile a Vega-Lite spec to Vega and pre-transform with VegaFusion. Note: This function should only be called on a Vega-Lite spec that was generated with the "vegafusion" data transformer enabled. In particular, this spec may contain references to extract datasets using table:// prefixed URLs. Parameters ---------- vegalite_spec: dict A Vega-Lite spec that was generated from an Altair chart with the "vegafusion" data transformer enabled Returns ------- dict A Vega spec that has been pre-transformed by VegaFusion """ # Local import to avoid circular ImportError from altair import data_transformers, vegalite_compilers vf = import_vegafusion() # Compile Vega-Lite spec to Vega compiler = vegalite_compilers.get() if compiler is None: msg = "No active vega-lite compiler plugin found" raise ValueError(msg) vega_spec = compiler(vegalite_spec) # Retrieve dict of inline tables referenced by the spec inline_tables = get_inline_tables(vega_spec) # Pre-evaluate transforms in vega spec with vegafusion row_limit = data_transformers.options.get("max_rows", None) transformed_vega_spec, warnings = vf.runtime.pre_transform_spec( vega_spec, vf.get_local_tz(), inline_datasets=inline_tables, row_limit=row_limit, ) # Check from row limit warning and convert to MaxRowsError handle_row_limit_exceeded(row_limit, warnings) return transformed_vega_spec def handle_row_limit_exceeded(row_limit: int | None, warnings: list): for warning in warnings: if warning.get("type") == "RowLimitExceeded": msg = ( "The number of dataset rows after filtering and aggregation exceeds\n" f"the current limit of {row_limit}. Try adding an aggregation to reduce\n" "the size of the dataset that must be loaded into the browser. Or, disable\n" "the limit by calling alt.data_transformers.disable_max_rows(). Note that\n" "disabling this limit may cause the browser to freeze or crash." ) raise MaxRowsError(msg) def using_vegafusion() -> bool: """Check whether the vegafusion data transformer is enabled.""" # Local import to avoid circular ImportError from altair import data_transformers return data_transformers.active == "vegafusion" ================================================ FILE: altair/utils/compiler.py ================================================ from collections.abc import Callable from typing import Any from altair.utils import PluginRegistry # ============================================================================== # Vega-Lite to Vega compiler registry # ============================================================================== VegaLiteCompilerType = Callable[[dict[str, Any]], dict[str, Any]] class VegaLiteCompilerRegistry(PluginRegistry[VegaLiteCompilerType, dict[str, Any]]): pass ================================================ FILE: altair/utils/core.py ================================================ """Utility routines.""" from __future__ import annotations import itertools import json import re import sys import traceback import warnings from collections.abc import Callable, Iterator, Mapping, MutableMapping from copy import deepcopy from itertools import groupby from operator import itemgetter from typing import ( TYPE_CHECKING, Any, Concatenate, Literal, ParamSpec, TypeVar, cast, overload, ) import jsonschema import narwhals.stable.v1 as nw from narwhals.stable.v1.dependencies import is_pandas_dataframe, is_polars_dataframe from narwhals.stable.v1.typing import IntoDataFrame from altair.utils.schemapi import SchemaBase, SchemaLike, Undefined if sys.version_info >= (3, 12): from typing import Protocol, TypeAliasType, runtime_checkable else: from typing_extensions import Protocol, TypeAliasType, runtime_checkable if TYPE_CHECKING: import pandas as pd from narwhals.stable.v1.typing import IntoExpr from altair.utils._dfi_types import DataFrame as DfiDataFrame from altair.vegalite.v6.schema._typing import StandardType_T as InferredVegaLiteType _PandasDataFrameT = TypeVar("_PandasDataFrameT", bound="pd.DataFrame") TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame) T = TypeVar("T") P = ParamSpec("P") R = TypeVar("R") WrapsFunc = TypeAliasType("WrapsFunc", Callable[..., R], type_params=(R,)) WrappedFunc = TypeAliasType("WrappedFunc", Callable[P, R], type_params=(P, R)) # NOTE: Requires stringized form to avoid `< (3, 11)` issues # See: https://github.com/vega/altair/actions/runs/10667859416/job/29567290871?pr=3565 WrapsMethod = TypeAliasType( "WrapsMethod", "Callable[Concatenate[T, ...], R]", type_params=(T, R) ) WrappedMethod = TypeAliasType( "WrappedMethod", Callable[Concatenate[T, P], R], type_params=(T, P, R) ) @runtime_checkable class DataFrameLike(Protocol): def __dataframe__( self, nan_as_null: bool = False, allow_copy: bool = True ) -> DfiDataFrame: ... TYPECODE_MAP = { "ordinal": "O", "nominal": "N", "quantitative": "Q", "temporal": "T", "geojson": "G", } INV_TYPECODE_MAP = {v: k for k, v in TYPECODE_MAP.items()} # aggregates from vega-lite version 4.6.0 AGGREGATES = [ "argmax", "argmin", "average", "count", "distinct", "max", "mean", "median", "min", "missing", "product", "q1", "q3", "ci0", "ci1", "stderr", "stdev", "stdevp", "sum", "valid", "values", "variance", "variancep", "exponential", "exponentialb", ] # window aggregates from vega-lite version 4.6.0 WINDOW_AGGREGATES = [ "row_number", "rank", "dense_rank", "percent_rank", "cume_dist", "ntile", "lag", "lead", "first_value", "last_value", "nth_value", ] # timeUnits from vega-lite version 4.17.0 TIMEUNITS = [ "year", "quarter", "month", "week", "day", "dayofyear", "date", "hours", "minutes", "seconds", "milliseconds", "yearquarter", "yearquartermonth", "yearmonth", "yearmonthdate", "yearmonthdatehours", "yearmonthdatehoursminutes", "yearmonthdatehoursminutesseconds", "yearweek", "yearweekday", "yearweekdayhours", "yearweekdayhoursminutes", "yearweekdayhoursminutesseconds", "yeardayofyear", "quartermonth", "monthdate", "monthdatehours", "monthdatehoursminutes", "monthdatehoursminutesseconds", "weekday", "weeksdayhours", "weekdayhours", "weekdayhoursminutes", "weekdayhoursminutesseconds", "dayhours", "dayhoursminutes", "dayhoursminutesseconds", "hoursminutes", "hoursminutesseconds", "minutesseconds", "secondsmilliseconds", "utcyear", "utcquarter", "utcmonth", "utcweek", "utcday", "utcdayofyear", "utcdate", "utchours", "utcminutes", "utcseconds", "utcmilliseconds", "utcyearquarter", "utcyearquartermonth", "utcyearmonth", "utcyearmonthdate", "utcyearmonthdatehours", "utcyearmonthdatehoursminutes", "utcyearmonthdatehoursminutesseconds", "utcyearweek", "utcyearweekday", "utcyearweekdayhours", "utcyearweekdayhoursminutes", "utcyearweekdayhoursminutesseconds", "utcyeardayofyear", "utcquartermonth", "utcmonthdate", "utcmonthdatehours", "utcmonthdatehoursminutes", "utcmonthdatehoursminutesseconds", "utcweekday", "utcweekdayhours", "utcweekdayhoursminutes", "utcweekdayhoursminutesseconds", "utcdayhours", "utcdayhoursminutes", "utcdayhoursminutesseconds", "utchoursminutes", "utchoursminutesseconds", "utcminutesseconds", "utcsecondsmilliseconds", ] VALID_TYPECODES = list(itertools.chain(iter(TYPECODE_MAP), iter(INV_TYPECODE_MAP))) SHORTHAND_UNITS = { "field": "(?P.*)", "type": "(?P{})".format("|".join(VALID_TYPECODES)), "agg_count": "(?Pcount)", "op_count": "(?Pcount)", "aggregate": "(?P{})".format("|".join(AGGREGATES)), "window_op": "(?P{})".format("|".join(AGGREGATES + WINDOW_AGGREGATES)), "timeUnit": "(?P{})".format("|".join(TIMEUNITS)), } SHORTHAND_KEYS: frozenset[Literal["field", "aggregate", "type", "timeUnit"]] = ( frozenset(("field", "aggregate", "type", "timeUnit")) ) def infer_vegalite_type_for_pandas( data: Any, ) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list[Any]]: """ From an array-like input, infer the correct vega typecode. ('ordinal', 'nominal', 'quantitative', or 'temporal'). Parameters ---------- data: Any """ # This is safe to import here, as this function is only called on pandas input. from pandas.api.types import infer_dtype typ = infer_dtype(data, skipna=False) if typ in { "floating", "mixed-integer-float", "integer", "mixed-integer", "complex", }: return "quantitative" elif typ == "categorical" and hasattr(data, "cat") and data.cat.ordered: return ("ordinal", data.cat.categories.tolist()) elif typ in {"string", "bytes", "categorical", "boolean", "mixed", "unicode"}: return "nominal" elif typ in { "datetime", "datetime64", "timedelta", "timedelta64", "date", "time", "period", }: return "temporal" else: warnings.warn( f"I don't know how to infer vegalite type from '{typ}'. " "Defaulting to nominal.", stacklevel=1, ) return "nominal" def merge_props_geom(feat: dict[str, Any]) -> dict[str, Any]: """ Merge properties with geometry. * Overwrites 'type' and 'geometry' entries if existing. """ geom = {k: feat[k] for k in ("type", "geometry")} try: feat["properties"].update(geom) props_geom = feat["properties"] except (AttributeError, KeyError): # AttributeError when 'properties' equals None # KeyError when 'properties' is non-existing props_geom = geom return props_geom def sanitize_geo_interface(geo: MutableMapping[Any, Any]) -> dict[str, Any]: """ Sanitize a geo_interface to prepare it for serialization. * Make a copy * Convert type array or _Array to list * Convert tuples to lists (using json.loads/dumps) * Merge properties with geometry """ geo = deepcopy(geo) # convert type _Array or array to list for key in geo: if str(type(geo[key]).__name__).startswith(("_Array", "array")): geo[key] = geo[key].tolist() # convert (nested) tuples to lists geo_dct: dict = json.loads(json.dumps(geo)) # sanitize features if geo_dct["type"] == "FeatureCollection": geo_dct = geo_dct["features"] if len(geo_dct) > 0: for idx, feat in enumerate(geo_dct): geo_dct[idx] = merge_props_geom(feat) elif geo_dct["type"] == "Feature": geo_dct = merge_props_geom(geo_dct) else: geo_dct = {"type": "Feature", "geometry": geo_dct} return geo_dct def numpy_is_subtype(dtype: Any, subtype: Any) -> bool: # This is only called on `numpy` inputs, so it's safe to import it here. import numpy as np try: return cast("bool", np.issubdtype(dtype, subtype)) except (NotImplementedError, TypeError): return False def sanitize_pandas_dataframe(df: _PandasDataFrameT) -> _PandasDataFrameT: # noqa: C901 """ Sanitize a DataFrame to prepare it for serialization. * Make a copy * Convert RangeIndex columns to strings * Raise ValueError if column names are not strings * Raise ValueError if it has a hierarchical index. * Convert categoricals to strings. * Convert np.bool_ dtypes to Python bool objects * Convert np.int dtypes to Python int objects * Convert floats to objects and replace NaNs/infs with None. * Convert DateTime dtypes into appropriate string representations * Convert Nullable integers to objects and replace NaN with None * Convert Nullable boolean to objects and replace NaN with None * convert dedicated string column to objects and replace NaN with None * Raise a ValueError for TimeDelta dtypes """ # This is safe to import here, as this function is only called on pandas input. # NumPy is a required dependency of pandas so is also safe to import. import numpy as np import pandas as pd df = cast("_PandasDataFrameT", df.copy()) if isinstance(df.columns, pd.RangeIndex): df.columns = df.columns.astype(str) for col_name in df.columns: if not isinstance(col_name, str): msg = ( f"Dataframe contains invalid column name: {col_name!r}. " "Column names must be strings" ) raise ValueError(msg) if isinstance(df.index, pd.MultiIndex): msg = "Hierarchical indices not supported" raise ValueError(msg) if isinstance(df.columns, pd.MultiIndex): msg = "Hierarchical indices not supported" raise ValueError(msg) def to_list_if_array(val): if isinstance(val, np.ndarray): return val.tolist() else: return val for dtype_item in df.dtypes.items(): # We know that the column names are strings from the isinstance check # further above but mypy thinks it is of type Hashable and therefore does not # let us assign it to the col_name variable which is already of type str. col_name = cast("str", dtype_item[0]) dtype = dtype_item[1] dtype_name = str(dtype) if dtype_name == "category": # Work around bug in to_json for categorical types in older versions # of pandas as they do not properly convert NaN values to null in to_json. # We can probably remove this part once we require pandas >= 1.0 col = df[col_name].astype(object) df[col_name] = col.where(col.notnull(), None) elif dtype_name in ("string", "str"): # dedicated string datatype (since 1.0) # https://pandas.pydata.org/pandas-docs/version/1.0.0/whatsnew/v1.0.0.html#dedicated-string-data-type col = df[col_name].astype(object) df[col_name] = col.where(col.notnull(), None) elif dtype_name == "bool": # convert numpy bools to objects; np.bool is not JSON serializable df[col_name] = df[col_name].astype(object) elif dtype_name == "boolean": # dedicated boolean datatype (since 1.0) # https://pandas.io/docs/user_guide/boolean.html col = df[col_name].astype(object) df[col_name] = col.where(col.notnull(), None) elif dtype_name.startswith(("datetime", "timestamp")): # Convert datetimes to strings. This needs to be a full ISO string # with time, which is why we cannot use ``col.astype(str)``. # This is because Javascript parses date-only times in UTC, but # parses full ISO-8601 dates as local time, and dates in Vega and # Vega-Lite are displayed in local time by default. # (see https://github.com/vega/altair/issues/1027) df[col_name] = ( df[col_name].apply(lambda x: x.isoformat()).replace("NaT", "") ) elif dtype_name.startswith("timedelta"): msg = ( f'Field "{col_name}" has type "{dtype}" which is ' "not supported by Altair. Please convert to " "either a timestamp or a numerical value." "" ) raise ValueError(msg) elif dtype_name.startswith("geometry"): # geopandas >=0.6.1 uses the dtype geometry. Continue here # otherwise it will give an error on np.issubdtype(dtype, np.integer) continue elif ( dtype_name in { "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64", "Float32", "Float64", } ): # nullable integer datatypes (since 24.0) and nullable float datatypes (since 1.2.0) # https://pandas.pydata.org/pandas-docs/version/0.25/whatsnew/v0.24.0.html#optional-integer-na-support col = df[col_name].astype(object) df[col_name] = col.where(col.notnull(), None) elif numpy_is_subtype(dtype, np.integer): # convert integers to objects; np.int is not JSON serializable df[col_name] = df[col_name].astype(object) elif numpy_is_subtype(dtype, np.floating): # For floats, convert to Python float: np.float is not JSON serializable # Also convert NaN/inf values to null, as they are not JSON serializable col = df[col_name] bad_values = col.isnull() | np.isinf(col) df[col_name] = col.astype(object).where(~bad_values, None) elif dtype == object: # noqa: E721 # Convert numpy arrays saved as objects to lists # Arrays are not JSON serializable col = df[col_name].astype(object).apply(to_list_if_array) df[col_name] = col.where(col.notnull(), None) return df def sanitize_narwhals_dataframe( data: nw.DataFrame[TIntoDataFrame], ) -> nw.DataFrame[TIntoDataFrame]: """Sanitize narwhals.DataFrame for JSON serialization.""" schema = data.schema columns: list[IntoExpr] = [] # See https://github.com/vega/altair/issues/1027 for why this is necessary. local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S" is_polars = is_polars_dataframe(data.to_native()) for name, dtype in schema.items(): if dtype == nw.Date and is_polars: # Polars doesn't allow formatting `Date` with time directives. # The date -> datetime cast is extremely fast compared with `to_string` columns.append( nw.col(name).cast(nw.Datetime).dt.to_string(local_iso_fmt_string) ) elif dtype == nw.Date: columns.append(nw.col(name).dt.to_string(local_iso_fmt_string)) elif dtype == nw.Datetime: # Preserve timezone information when present so Vega-Lite can disambiguate # repeated local times during DST transitions. fmt = f"{local_iso_fmt_string}%.f" if getattr(dtype, "time_zone", None) is not None: fmt = f"{fmt}%z" columns.append(nw.col(name).dt.to_string(fmt)) elif dtype == nw.Duration: msg = ( f'Field "{name}" has type "{dtype}" which is ' "not supported by Altair. Please convert to " "either a timestamp or a numerical value." "" ) raise ValueError(msg) else: columns.append(name) return data.select(columns) def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]: """ Wrap `data` in `narwhals.DataFrame`. If `data` is not supported by Narwhals, but it is convertible to a PyArrow table, then first convert to a PyArrow Table, and then wrap in `narwhals.DataFrame`. """ data_nw = nw.from_native(data, eager_or_interchange_only=True) if nw.get_level(data_nw) == "interchange": # If Narwhals' support for `data`'s class is only metadata-level, then we # use the interchange protocol to convert to a PyArrow Table. from altair.utils.data import arrow_table_from_dfi_dataframe pa_table = arrow_table_from_dfi_dataframe(data) # type: ignore[arg-type] data_nw = nw.from_native(pa_table, eager_only=True) return data_nw def parse_shorthand( # noqa: C901 shorthand: dict[str, Any] | str, data: IntoDataFrame | None = None, parse_aggregates: bool = True, parse_window_ops: bool = False, parse_timeunits: bool = True, parse_types: bool = True, ) -> dict[str, Any]: """ General tool to parse shorthand values. These are of the form: - "col_name" - "col_name:O" - "average(col_name)" - "average(col_name):O" Optionally, a dataframe may be supplied, from which the type will be inferred if not specified in the shorthand. Parameters ---------- shorthand : dict or string The shorthand representation to be parsed data : DataFrame, optional If specified and of type DataFrame, then use these values to infer the column type if not provided by the shorthand. parse_aggregates : boolean If True (default), then parse aggregate functions within the shorthand. parse_window_ops : boolean If True then parse window operations within the shorthand (default:False) parse_timeunits : boolean If True (default), then parse timeUnits from within the shorthand parse_types : boolean If True (default), then parse typecodes within the shorthand Returns ------- attrs : dict a dictionary of attributes extracted from the shorthand Examples -------- >>> import pandas as pd >>> data = pd.DataFrame({"foo": ["A", "B", "A", "B"], "bar": [1, 2, 3, 4]}) >>> parse_shorthand("name") == {"field": "name"} True >>> parse_shorthand("name:Q") == {"field": "name", "type": "quantitative"} True >>> parse_shorthand("average(col)") == {"aggregate": "average", "field": "col"} True >>> parse_shorthand("foo:O") == {"field": "foo", "type": "ordinal"} True >>> parse_shorthand("min(foo):Q") == { ... "aggregate": "min", ... "field": "foo", ... "type": "quantitative", ... } True >>> parse_shorthand("month(col)") == { ... "field": "col", ... "timeUnit": "month", ... "type": "temporal", ... } True >>> parse_shorthand("year(col):O") == { ... "field": "col", ... "timeUnit": "year", ... "type": "ordinal", ... } True >>> parse_shorthand("foo", data) == {"field": "foo", "type": "nominal"} True >>> parse_shorthand("bar", data) == {"field": "bar", "type": "quantitative"} True >>> parse_shorthand("bar:O", data) == {"field": "bar", "type": "ordinal"} True >>> parse_shorthand("sum(bar)", data) == { ... "aggregate": "sum", ... "field": "bar", ... "type": "quantitative", ... } True >>> parse_shorthand("count()", data) == { ... "aggregate": "count", ... "type": "quantitative", ... } True """ from altair.utils.data import is_data_type if not shorthand: return {} patterns = [] if parse_aggregates: patterns.extend([r"{agg_count}\(\)"]) patterns.extend([r"{aggregate}\({field}\)"]) if parse_window_ops: patterns.extend([r"{op_count}\(\)"]) patterns.extend([r"{window_op}\({field}\)"]) if parse_timeunits: patterns.extend([r"{timeUnit}\({field}\)"]) patterns.extend([r"{field}"]) if parse_types: patterns = list(itertools.chain(*((p + ":{type}", p) for p in patterns))) regexps = ( re.compile(r"\A" + p.format(**SHORTHAND_UNITS) + r"\Z", re.DOTALL) for p in patterns ) # find matches depending on valid fields passed if isinstance(shorthand, dict): attrs = shorthand else: attrs = next( exp.match(shorthand).groupdict() # type: ignore[union-attr] for exp in regexps if exp.match(shorthand) is not None ) # Handle short form of the type expression if "type" in attrs: attrs["type"] = INV_TYPECODE_MAP.get(attrs["type"], attrs["type"]) # counts are quantitative by default if attrs == {"aggregate": "count"}: attrs["type"] = "quantitative" # times are temporal by default if "timeUnit" in attrs and "type" not in attrs: attrs["type"] = "temporal" # if data is specified and type is not, infer type from data if "type" not in attrs and is_data_type(data): unescaped_field = attrs["field"].replace("\\", "") data_nw = nw.from_native(data, eager_or_interchange_only=True) schema = data_nw.schema if unescaped_field in schema: column = data_nw[unescaped_field] if schema[unescaped_field] in { nw.Object, nw.Unknown, } and is_pandas_dataframe(data_nw.to_native()): attrs["type"] = infer_vegalite_type_for_pandas(column.to_native()) else: attrs["type"] = infer_vegalite_type_for_narwhals(column) if isinstance(attrs["type"], tuple): attrs["sort"] = attrs["type"][1] attrs["type"] = attrs["type"][0] # If an unescaped colon is still present, it's often due to an incorrect data type specification # but could also be due to using a column name with ":" in it. if ( "field" in attrs and ":" in attrs["field"] and attrs["field"][attrs["field"].rfind(":") - 1] != "\\" ): raise ValueError( '"{}" '.format(attrs["field"].split(":")[-1]) + "is not one of the valid encoding data types: {}.".format( ", ".join(TYPECODE_MAP.values()) ) + "\nFor more details, see https://altair-viz.github.io/user_guide/encodings/index.html#encoding-data-types. " + "If you are trying to use a column name that contains a colon, " + 'prefix it with a backslash; for example "column\\:name" instead of "column:name".' ) return attrs def infer_vegalite_type_for_narwhals( column: nw.Series, ) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list]: dtype = column.dtype if ( nw.is_ordered_categorical(column) and not (categories := column.cat.get_categories()).is_empty() ): return "ordinal", categories.to_list() if dtype == nw.String or dtype == nw.Categorical or dtype == nw.Boolean: # noqa: PLR1714 return "nominal" elif dtype.is_numeric(): return "quantitative" elif dtype == nw.Datetime or dtype == nw.Date: # noqa: PLR1714 # We use `== nw.Datetime` to check for any kind of Datetime, regardless of time # unit and time zone. Prefer this over `dtype in {nw.Datetime, nw.Date}`, # see https://narwhals-dev.github.io/narwhals/backcompat. return "temporal" else: msg = f"Unexpected DtypeKind: {dtype}" raise ValueError(msg) def _wrap_and_copy_doc(tp: Callable[..., Any], cb: Callable[..., Any]) -> None: """ Raises when no doc was found. Notes ----- - Reference to ``tp`` is stored in ``cb.__wrapped__``. - The doc for ``cb`` will have a ``.rst`` link added, referring to ``tp``. """ cb.__wrapped__ = getattr(tp, "__init__", tp) # type: ignore[attr-defined] if doc_in := tp.__doc__: line_1 = f"{cb.__doc__ or f'Refer to :class:`{tp.__name__}`'}\n" cb.__doc__ = "".join((line_1, *doc_in.splitlines(keepends=True)[1:])) else: msg = f"Found no doc for {tp!r}" raise AttributeError(msg) class _MethodSignatureCopier(Protocol[P]): def __call__(self, cb: WrapsMethod[T, R], /) -> WrappedMethod[T, P, R]: ... def use_signature(tp: Callable[P, Any], /) -> _MethodSignatureCopier[P]: """ Use the signature and doc of ``tp`` for the decorated method ``cb``. Returns ------- A decorator that copies the doc and static typing signature from ``tp`` to ``cb``. """ def decorate(cb: WrapsMethod[T, R], /) -> WrappedMethod[T, P, R]: _wrap_and_copy_doc(tp, cb) return cb return decorate class _FunctionSignatureCopier(Protocol[P]): def __call__(self, cb: Callable[..., R], /) -> Callable[P, R]: ... def use_signature_func(tp: Callable[P, Any], /) -> _FunctionSignatureCopier[P]: """ Use the signature and doc of ``tp`` for the decorated function ``cb``. Returns ------- A decorator that copies the doc and static typing signature from ``tp`` to ``cb``. """ def decorate(fn: Callable[..., R], /) -> Callable[P, R]: _wrap_and_copy_doc(tp, fn) return fn return decorate @overload def update_nested( original: MutableMapping[Any, Any], update: Mapping[Any, Any], copy: Literal[False] = ..., ) -> MutableMapping[Any, Any]: ... @overload def update_nested( original: Mapping[Any, Any], update: Mapping[Any, Any], copy: Literal[True], ) -> MutableMapping[Any, Any]: ... def update_nested( original: Any, update: Mapping[Any, Any], copy: bool = False, ) -> MutableMapping[Any, Any]: """ Update nested dictionaries. Parameters ---------- original : MutableMapping the original (nested) dictionary, which will be updated in-place update : Mapping the nested dictionary of updates copy : bool, default False if True, then copy the original dictionary rather than modifying it Returns ------- original : MutableMapping a reference to the (modified) original dict Examples -------- >>> original = {"x": {"b": 2, "c": 4}} >>> update = {"x": {"b": 5, "d": 6}, "y": 40} >>> update_nested(original, update) # doctest: +SKIP {'x': {'b': 5, 'c': 4, 'd': 6}, 'y': 40} >>> original # doctest: +SKIP {'x': {'b': 5, 'c': 4, 'd': 6}, 'y': 40} """ if copy: original = deepcopy(original) for key, val in update.items(): if isinstance(val, Mapping): orig_val = original.get(key, {}) if isinstance(orig_val, MutableMapping): original[key] = update_nested(orig_val, val) else: original[key] = val else: original[key] = val return original def display_traceback(in_ipython: bool = True): exc_info = sys.exc_info() if in_ipython: from IPython.core.getipython import get_ipython ip = get_ipython() else: ip = None if ip is not None: ip.showtraceback(exc_info) else: traceback.print_exception(*exc_info) _ChannelType = Literal["field", "datum", "value"] _CHANNEL_CACHE: _ChannelCache """Singleton `_ChannelCache` instance. Initialized on first use. """ class _ChannelCache: channel_to_name: dict[type[SchemaBase], str] name_to_channel: dict[str, dict[_ChannelType, type[SchemaBase]]] @classmethod def from_cache(cls) -> _ChannelCache: global _CHANNEL_CACHE try: cached = _CHANNEL_CACHE except NameError: cached = cls.__new__(cls) cached.channel_to_name = _init_channel_to_name() # pyright: ignore[reportAttributeAccessIssue] cached.name_to_channel = _invert_group_channels(cached.channel_to_name) _CHANNEL_CACHE = cached return _CHANNEL_CACHE def get_encoding(self, tp: type[Any], /) -> str: if encoding := self.channel_to_name.get(tp): return encoding msg = f"positional of type {type(tp).__name__!r}" raise NotImplementedError(msg) def _wrap_in_channel(self, obj: Any, encoding: str, /): if isinstance(obj, SchemaBase): return obj elif isinstance(obj, str): obj = {"shorthand": obj} elif isinstance(obj, (list, tuple)): return [self._wrap_in_channel(el, encoding) for el in obj] elif isinstance(obj, SchemaLike): obj = obj.to_dict() if channel := self.name_to_channel.get(encoding): tp = channel["value" if "value" in obj else "field"] try: # Don't force validation here; some objects won't be valid until # they're created in the context of a chart. return tp.from_dict(obj, validate=False) except jsonschema.ValidationError: # our attempts at finding the correct class have failed return obj else: warnings.warn(f"Unrecognized encoding channel {encoding!r}", stacklevel=1) return obj def infer_encoding_types(self, kwargs: dict[str, Any], /): return { encoding: self._wrap_in_channel(obj, encoding) for encoding, obj in kwargs.items() if obj is not Undefined } def _init_channel_to_name(): """ Construct a dictionary of channel type to encoding name. Note ---- The return type is not expressible using annotations, but is used internally by `mypy`/`pyright` and avoids the need for type ignores. Returns ------- mapping: dict[type[``] | type[``] | type[``], str] """ from altair.vegalite.v6.schema import channels as ch mixins = ch.FieldChannelMixin, ch.ValueChannelMixin, ch.DatumChannelMixin return { c: c._encoding_name for c in ch.__dict__.values() if isinstance(c, type) and issubclass(c, mixins) and issubclass(c, SchemaBase) } def _invert_group_channels( m: dict[type[SchemaBase], str], / ) -> dict[str, dict[_ChannelType, type[SchemaBase]]]: """Grouped inverted index for `_ChannelCache.channel_to_name`.""" def _reduce(it: Iterator[tuple[type[Any], str]]) -> Any: """ Returns a 1-2 item dict, per channel. Never includes `datum`, as it is never utilized in `wrap_in_channel`. """ item: dict[Any, type[SchemaBase]] = {} for tp, _ in it: name = tp.__name__ if name.endswith("Datum"): continue elif name.endswith("Value"): sub_key = "value" else: sub_key = "field" item[sub_key] = tp return item grouper = groupby(m.items(), itemgetter(1)) return {k: _reduce(chans) for k, chans in grouper} def infer_encoding_types(args: tuple[Any, ...], kwargs: dict[str, Any]): """ Infer typed keyword arguments for args and kwargs. Parameters ---------- args : Sequence Sequence of function args kwargs : MutableMapping Dict of function kwargs Returns ------- kwargs : dict All args and kwargs in a single dict, with keys and types based on the channels mapping. """ cache = _ChannelCache.from_cache() # First use the mapping to convert args to kwargs based on their types. for arg in args: el = next(iter(arg), None) if isinstance(arg, (list, tuple)) else arg encoding = cache.get_encoding(type(el)) if encoding not in kwargs: kwargs[encoding] = arg else: msg = f"encoding {encoding!r} specified twice." raise ValueError(msg) return cache.infer_encoding_types(kwargs) ================================================ FILE: altair/utils/data.py ================================================ from __future__ import annotations import hashlib import json import random import sys from collections.abc import Callable, MutableMapping, Sequence from functools import partial from pathlib import Path from typing import ( TYPE_CHECKING, Any, Concatenate, Literal, ParamSpec, TypedDict, TypeVar, overload, ) import narwhals.stable.v1 as nw from narwhals.stable.v1.dependencies import is_pandas_dataframe from narwhals.stable.v1.typing import IntoDataFrame from ._importers import import_pyarrow_interchange from .core import ( DataFrameLike, sanitize_geo_interface, sanitize_narwhals_dataframe, sanitize_pandas_dataframe, to_eager_narwhals_dataframe, ) from .plugin_registry import PluginRegistry if sys.version_info >= (3, 13): from typing import Protocol, runtime_checkable else: from typing_extensions import Protocol, runtime_checkable if TYPE_CHECKING: if sys.version_info >= (3, 13): from typing import TypeIs else: from typing_extensions import TypeIs from typing import TypeAlias import pandas as pd import pyarrow as pa @runtime_checkable class SupportsGeoInterface(Protocol): __geo_interface__: MutableMapping DataType: TypeAlias = ( dict[Any, Any] | IntoDataFrame | SupportsGeoInterface | DataFrameLike ) TDataType = TypeVar("TDataType", bound=DataType) TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame) VegaLiteDataDict: TypeAlias = dict[str, str | dict[Any, Any] | list[dict[Any, Any]]] ToValuesReturnType: TypeAlias = dict[str, dict[Any, Any] | list[dict[Any, Any]]] SampleReturnType = IntoDataFrame | dict[str, Sequence] | None def is_data_type(obj: Any) -> TypeIs[DataType]: return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance( nw.from_native(obj, eager_or_interchange_only=True, pass_through=True), nw.DataFrame, ) # ============================================================================== # Data transformer registry # # A data transformer is a callable that takes a supported data type and returns # a transformed dictionary version of it which is compatible with the VegaLite schema. # The dict objects will be the Data portion of the VegaLite schema. # # Renderers only deal with the dict form of a # VegaLite spec, after the Data model has been put into a schema compliant # form. # ============================================================================== P = ParamSpec("P") # NOTE: `Any` required due to the complexity of existing signatures imported in `altair.vegalite.v6.data.py` R = TypeVar("R", VegaLiteDataDict, Any) DataTransformerType = Callable[Concatenate[DataType, P], R] class DataTransformerRegistry(PluginRegistry[DataTransformerType, R]): _global_settings = {"consolidate_datasets": True} @property def consolidate_datasets(self) -> bool: return self._global_settings["consolidate_datasets"] @consolidate_datasets.setter def consolidate_datasets(self, value: bool) -> None: self._global_settings["consolidate_datasets"] = value # ============================================================================== class MaxRowsError(Exception): """Raised when a data model has too many rows.""" def __init__(self, message: str, /) -> None: self.message = message super().__init__(self.message) @classmethod def from_limit_rows(cls, user_rows: int, max_rows: int, /) -> MaxRowsError: msg = ( f"The number of rows in your dataset ({user_rows}) is greater " f"than the maximum allowed ({max_rows}).\n\n" "Try enabling the VegaFusion data transformer which " "raises this limit by pre-evaluating data\n" "transformations in Python.\n" " >> import altair as alt\n" ' >> alt.data_transformers.enable("vegafusion")\n\n' "Or, see https://altair-viz.github.io/user_guide/large_datasets.html " "for additional information\n" "on how to plot large datasets." ) return cls(msg) @overload def limit_rows(data: None = ..., max_rows: int | None = ...) -> partial: ... @overload def limit_rows(data: DataType, max_rows: int | None = ...) -> DataType: ... def limit_rows( data: DataType | None = None, max_rows: int | None = 5000 ) -> partial | DataType: """ Raise MaxRowsError if the data model has more than max_rows. If max_rows is None, then do not perform any check. """ if data is None: return partial(limit_rows, max_rows=max_rows) check_data_type(data) if isinstance(data, SupportsGeoInterface): if data.__geo_interface__["type"] == "FeatureCollection": values = data.__geo_interface__["features"] else: values = data.__geo_interface__ elif isinstance(data, dict): if "values" in data: values = data["values"] else: return data else: data = to_eager_narwhals_dataframe(data) values = data n = len(values) if max_rows is not None and n > max_rows: raise MaxRowsError.from_limit_rows(n, max_rows) return data @overload def sample( data: None = ..., n: int | None = ..., frac: float | None = ... ) -> partial: ... @overload def sample( data: TIntoDataFrame, n: int | None = ..., frac: float | None = ... ) -> TIntoDataFrame: ... @overload def sample( data: DataType, n: int | None = ..., frac: float | None = ... ) -> SampleReturnType: ... def sample( data: DataType | None = None, n: int | None = None, frac: float | None = None, ) -> partial | SampleReturnType: """Reduce the size of the data model by sampling without replacement.""" if data is None: return partial(sample, n=n, frac=frac) check_data_type(data) if is_pandas_dataframe(data): return data.sample(n=n, frac=frac) elif isinstance(data, dict): if "values" in data: values = data["values"] if not n: if frac is None: msg = "frac cannot be None if n is None and data is a dictionary" raise ValueError(msg) n = int(frac * len(values)) values = random.sample(values, n) return {"values": values} else: # Maybe this should raise an error or return something useful? return None data = nw.from_native(data, eager_only=True) if not n: if frac is None: msg = "frac cannot be None if n is None with this data input type" raise ValueError(msg) n = int(frac * len(data)) indices = random.sample(range(len(data)), n) return data[indices].to_native() _FormatType = Literal["csv", "json"] class _FormatDict(TypedDict): type: _FormatType class _ToFormatReturnUrlDict(TypedDict): url: str format: _FormatDict @overload def to_json( data: None = ..., prefix: str = ..., extension: str = ..., filename: str = ..., urlpath: str = ..., ) -> partial: ... @overload def to_json( data: DataType, prefix: str = ..., extension: str = ..., filename: str = ..., urlpath: str = ..., ) -> _ToFormatReturnUrlDict: ... def to_json( data: DataType | None = None, prefix: str = "altair-data", extension: str = "json", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", ) -> partial | _ToFormatReturnUrlDict: """Write the data model to a .json file and return a url based data model.""" kwds = _to_text_kwds(prefix, extension, filename, urlpath) if data is None: return partial(to_json, **kwds) else: data_str = _data_to_json_string(data) return _to_text(data_str, **kwds, format=_FormatDict(type="json")) @overload def to_csv( data: None = ..., prefix: str = ..., extension: str = ..., filename: str = ..., urlpath: str = ..., ) -> partial: ... @overload def to_csv( data: dict | pd.DataFrame | DataFrameLike, prefix: str = ..., extension: str = ..., filename: str = ..., urlpath: str = ..., ) -> _ToFormatReturnUrlDict: ... def to_csv( data: dict | pd.DataFrame | DataFrameLike | None = None, prefix: str = "altair-data", extension: str = "csv", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", ) -> partial | _ToFormatReturnUrlDict: """Write the data model to a .csv file and return a url based data model.""" kwds = _to_text_kwds(prefix, extension, filename, urlpath) if data is None: return partial(to_csv, **kwds) else: data_str = _data_to_csv_string(data) return _to_text(data_str, **kwds, format=_FormatDict(type="csv")) def _to_text( data: str, prefix: str, extension: str, filename: str, urlpath: str, format: _FormatDict, ) -> _ToFormatReturnUrlDict: data_hash = _compute_data_hash(data) filename = filename.format(prefix=prefix, hash=data_hash, extension=extension) Path(filename).write_text(data, encoding="utf-8") url = str(Path(urlpath, filename)) return _ToFormatReturnUrlDict({"url": url, "format": format}) def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) -> dict[str, str]: # fmt: skip return {"prefix": prefix, "extension": extension, "filename": filename, "urlpath": urlpath} # fmt: skip def to_values(data: DataType) -> ToValuesReturnType: """Replace a DataFrame by a data model with values.""" check_data_type(data) # `pass_through=True` passes `data` through as-is if it is not a Narwhals object. data_native = nw.to_native(data, pass_through=True) if isinstance(data_native, SupportsGeoInterface): return {"values": _from_geo_interface(data_native)} elif is_pandas_dataframe(data_native): data_native = sanitize_pandas_dataframe(data_native) return {"values": data_native.to_dict(orient="records")} elif isinstance(data_native, dict): if "values" not in data_native: msg = "values expected in data dict, but not present." raise KeyError(msg) return data_native elif isinstance(data, nw.DataFrame): data = sanitize_narwhals_dataframe(data) return {"values": data.rows(named=True)} else: # Should never reach this state as tested by check_data_type msg = f"Unrecognized data type: {type(data)}" raise ValueError(msg) def check_data_type(data: DataType) -> None: if not is_data_type(data): msg = f"Expected dict, DataFrame or a __geo_interface__ attribute, got: {type(data)}" raise TypeError(msg) # ============================================================================== # Private utilities # ============================================================================== def _compute_data_hash(data_str: str) -> str: return hashlib.sha256(data_str.encode()).hexdigest()[:32] def _from_geo_interface(data: SupportsGeoInterface) -> dict[str, Any]: """ Sanitize a ``__geo_interface__`` w/ pre-sanitize step for ``pandas`` if needed. Introduces an intersection type:: geo: | SupportsGeoInterface """ geo = sanitize_pandas_dataframe(data) if is_pandas_dataframe(data) else data return sanitize_geo_interface(geo.__geo_interface__) def _data_to_json_string(data: DataType) -> str: """Return a JSON string representation of the input data.""" check_data_type(data) if isinstance(data, SupportsGeoInterface): return json.dumps(_from_geo_interface(data)) elif is_pandas_dataframe(data): data = sanitize_pandas_dataframe(data) return data.to_json(orient="records", double_precision=15) elif isinstance(data, dict): if "values" not in data: msg = "values expected in data dict, but not present." raise KeyError(msg) return json.dumps(data["values"], sort_keys=True) try: data_nw = nw.from_native(data, eager_only=True) except TypeError as exc: msg = "to_json only works with data expressed as a DataFrame or as a dict" raise NotImplementedError(msg) from exc data_nw = sanitize_narwhals_dataframe(data_nw) return json.dumps(data_nw.rows(named=True)) def _data_to_csv_string(data: DataType) -> str: """Return a CSV string representation of the input data.""" check_data_type(data) if isinstance(data, SupportsGeoInterface): msg = ( f"to_csv does not yet work with data that " f"is of type {type(SupportsGeoInterface).__name__!r}.\n" f"See https://github.com/vega/altair/issues/3441" ) raise NotImplementedError(msg) elif is_pandas_dataframe(data): data = sanitize_pandas_dataframe(data) return data.to_csv(index=False) elif isinstance(data, dict): if "values" not in data: msg = "values expected in data dict, but not present" raise KeyError(msg) try: import pandas as pd except ImportError as exc: msg = "pandas is required to convert a dict to a CSV string" raise ImportError(msg) from exc return pd.DataFrame.from_dict(data["values"]).to_csv(index=False) try: data_nw = nw.from_native(data, eager_only=True) except TypeError as exc: msg = "to_csv only works with data expressed as a DataFrame or as a dict" raise NotImplementedError(msg) from exc return data_nw.write_csv() def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> pa.Table: """Convert a DataFrame Interchange Protocol compatible object to an Arrow Table.""" import pyarrow as pa # First check if the dataframe object has a method to convert to arrow. # Give this preference over the pyarrow from_dataframe function since the object # has more control over the conversion, and may have broader compatibility. # This is the case for Polars, which supports Date32 columns in direct conversion # while pyarrow does not yet support this type in from_dataframe for convert_method_name in ("arrow", "to_arrow", "to_arrow_table", "to_pyarrow"): convert_method = getattr(dfi_df, convert_method_name, None) if callable(convert_method): result = convert_method() if isinstance(result, pa.Table): return result pi = import_pyarrow_interchange() return pi.from_dataframe(dfi_df) ================================================ FILE: altair/utils/deprecation.py ================================================ from __future__ import annotations import sys import threading import warnings from typing import TYPE_CHECKING, Literal if sys.version_info >= (3, 13): from warnings import deprecated as _deprecated else: from typing_extensions import deprecated as _deprecated if TYPE_CHECKING: if sys.version_info >= (3, 11): from typing import LiteralString else: from typing_extensions import LiteralString __all__ = [ "AltairDeprecationWarning", "deprecated", "deprecated_static_only", "deprecated_warn", ] class AltairDeprecationWarning(DeprecationWarning): ... def _format_message( version: LiteralString, alternative: LiteralString | None, message: LiteralString | None, /, ) -> LiteralString: output = f"\nDeprecated since `altair={version}`." if alternative: output = f"{output} Use {alternative} instead." return f"{output}\n{message}" if message else output # NOTE: Annotating the return type breaks `pyright` detecting [reportDeprecated] # NOTE: `LiteralString` requirement is introduced by stubs def deprecated( *, version: LiteralString, alternative: LiteralString | None = None, message: LiteralString | None = None, category: type[AltairDeprecationWarning] | None = AltairDeprecationWarning, stacklevel: int = 1, ): # te.deprecated """ Indicate that a class, function or overload is deprecated. When this decorator is applied to an object, the type checker will generate a diagnostic on usage of the deprecated object. Parameters ---------- version ``altair`` version the deprecation first appeared. alternative Suggested replacement class/method/function. message Additional message appended to ``version``, ``alternative``. category If the *category* is ``None``, no warning is emitted at runtime. stacklevel The *stacklevel* determines where the warning is emitted. If it is ``1`` (the default), the warning is emitted at the direct caller of the deprecated object; if it is higher, it is emitted further up the stack. Static type checker behavior is not affected by the *category* and *stacklevel* arguments. References ---------- [PEP 702](https://peps.python.org/pep-0702/) """ msg = _format_message(version, alternative, message) return _deprecated(msg, category=category, stacklevel=stacklevel) def deprecated_warn( message: LiteralString, *, version: LiteralString, alternative: LiteralString | None = None, category: type[AltairDeprecationWarning] = AltairDeprecationWarning, stacklevel: int = 2, action: Literal["once"] | None = None, ) -> None: """ Indicate that the current code path is deprecated. This should be used for non-trivial cases *only*. ``@deprecated`` should always be preferred as it is recognized by static type checkers. Parameters ---------- message Explanation of the deprecated behaviour. .. note:: Unlike ``@deprecated``, this is *not* optional. version ``altair`` version the deprecation first appeared. alternative Suggested replacement argument/method/function. category The runtime warning type emitted. stacklevel How far up the call stack to make this warning appear. A value of ``2`` attributes the warning to the caller of the code calling ``deprecated_warn()``. References ---------- [warnings.warn](https://docs.python.org/3/library/warnings.html#warnings.warn) """ msg = _format_message(version, alternative, message) if action is None: warnings.warn(msg, category=category, stacklevel=stacklevel) elif action == "once": _warn_once(msg, category=category, stacklevel=stacklevel) else: raise NotImplementedError(action) deprecated_static_only = _deprecated """ Using this decorator **exactly as described**, ensures ``message`` is displayed to a static type checker. **BE CAREFUL USING THIS**. See screenshots in `comment`_ for motivation. Every use should look like:: @deprecated_static_only( "Deprecated since `altair=5.5.0`. Use altair.other instead.", category=None, ) def old_function(*args): ... If a runtime warning is desired, use `@alt.utils.deprecated` instead. Parameters ---------- message : LiteralString - **Not** a variable - **Not** use placeholders - **Not** use concatenation - **Do not use anything that could be considered dynamic** category : None You **need** to explicitly pass ``None`` .. _comment: https://github.com/vega/altair/pull/3618#issuecomment-2423991968 --- """ class _WarningsMonitor: def __init__(self) -> None: self._warned: dict[LiteralString, Literal[True]] = {} self._lock = threading.Lock() def __contains__(self, key: LiteralString, /) -> bool: with self._lock: return key in self._warned def hit(self, key: LiteralString, /) -> None: with self._lock: self._warned[key] = True def clear(self) -> None: with self._lock: self._warned.clear() _warnings_monitor = _WarningsMonitor() def _warn_once( msg: LiteralString, /, *, category: type[AltairDeprecationWarning], stacklevel: int ) -> None: global _warnings_monitor if msg in _warnings_monitor: return else: _warnings_monitor.hit(msg) warnings.warn(msg, category=category, stacklevel=stacklevel + 1) ================================================ FILE: altair/utils/display.py ================================================ from __future__ import annotations import json import pkgutil import textwrap import uuid from collections.abc import Callable from typing import TYPE_CHECKING, Any from ._vegafusion_data import compile_with_vegafusion, using_vegafusion from .mimebundle import spec_to_mimebundle from .plugin_registry import PluginEnabler, PluginRegistry from .schemapi import validate_jsonschema if TYPE_CHECKING: from typing import TypeAlias # ============================================================================== # Renderer registry # ============================================================================== # MimeBundleType needs to be the same as what are acceptable return values # for _repr_mimebundle_, # see https://ipython.readthedocs.io/en/stable/config/integrating.html#MyObject._repr_mimebundle_ MimeBundleDataType: TypeAlias = dict[str, Any] MimeBundleMetaDataType: TypeAlias = dict[str, Any] MimeBundleType: TypeAlias = ( MimeBundleDataType | tuple[MimeBundleDataType, MimeBundleMetaDataType] ) RendererType: TypeAlias = Callable[..., MimeBundleType] # Subtype of MimeBundleType as more specific in the values of the dictionaries DefaultRendererReturnType: TypeAlias = tuple[ dict[str, str | dict[str, Any]], dict[str, dict[str, Any]] ] class RendererRegistry(PluginRegistry[RendererType, MimeBundleType]): entrypoint_err_messages = { "notebook": textwrap.dedent( """ To use the 'notebook' renderer, you must install the vega package and the associated Jupyter extension. See https://altair-viz.github.io/getting_started/installation.html for more information. """ ), } def set_embed_options( self, defaultStyle: bool | str | None = None, renderer: str | None = None, width: int | None = None, height: int | None = None, padding: int | None = None, scaleFactor: float | None = None, actions: bool | dict[str, bool] | None = None, format_locale: str | dict | None = None, time_format_locale: str | dict | None = None, **kwargs, ) -> PluginEnabler: """ Set options for embeddings of Vega & Vega-Lite charts. Options are fully documented at https://github.com/vega/vega-embed. Similar to the `enable()` method, this can be used as either a persistent global switch, or as a temporary local setting using a context manager (i.e. a `with` statement). Parameters ---------- defaultStyle : bool or string Specify a default stylesheet for embed actions. renderer : string The renderer to use for the view. One of "canvas" (default) or "svg" width : integer The view width in pixels height : integer The view height in pixels padding : integer The view padding in pixels scaleFactor : number The number by which to multiply the width and height (default 1) of an exported PNG or SVG image. actions : bool or dict Determines if action links ("Export as PNG/SVG", "View Source", "View Vega" (only for Vega-Lite), "Open in Vega Editor") are included with the embedded view. If the value is true, all action links will be shown and none if the value is false. This property can take a key-value mapping object that maps keys (export, source, compiled, editor) to boolean values for determining if each action link should be shown. format_locale : str or dict d3-format locale name or dictionary. Defaults to "en-US" for United States English. See https://github.com/d3/d3-format/tree/main/locale for available names and example definitions. time_format_locale : str or dict d3-time-format locale name or dictionary. Defaults to "en-US" for United States English. See https://github.com/d3/d3-time-format/tree/main/locale for available names and example definitions. **kwargs : Additional options are passed directly to embed options. """ options: dict[str, bool | str | float | dict[str, bool] | None] = { "defaultStyle": defaultStyle, "renderer": renderer, "width": width, "height": height, "padding": padding, "scaleFactor": scaleFactor, "actions": actions, "formatLocale": format_locale, "timeFormatLocale": time_format_locale, } kwargs.update({key: val for key, val in options.items() if val is not None}) return self.enable(None, embed_options=kwargs) # ============================================================================== # VegaLite v1/v2 renderer logic # ============================================================================== class Displayable: """ A base display class for VegaLite v1/v2. This class takes a VegaLite v1/v2 spec and does the following: 1. Optionally validates the spec against a schema. 2. Uses the RendererPlugin to grab a renderer and call it when the IPython/Jupyter display method (_repr_mimebundle_) is called. The spec passed to this class must be fully schema compliant and already have the data portion of the spec fully processed and ready to serialize. In practice, this means, the data portion of the spec should have been passed through appropriate data model transformers. """ renderers: RendererRegistry | None = None schema_path = ("altair", "") def __init__(self, spec: dict[str, Any], validate: bool = False) -> None: self.spec = spec self.validate = validate self._validate() def _validate(self) -> None: """Validate the spec against the schema.""" data = pkgutil.get_data(*self.schema_path) assert data is not None schema_dict: dict[str, Any] = json.loads(data.decode("utf-8")) validate_jsonschema( self.spec, schema_dict, ) def _repr_mimebundle_( self, include: Any = None, exclude: Any = None ) -> MimeBundleType: """Return a MIME bundle for display in Jupyter frontends.""" if self.renderers is not None: renderer_func = self.renderers.get() assert renderer_func is not None return renderer_func(self.spec) else: return {} def default_renderer_base( spec: dict[str, Any], mime_type: str, str_repr: str, **options ) -> DefaultRendererReturnType: """ A default renderer for Vega or VegaLite that works for modern frontends. This renderer works with modern frontends (JupyterLab, nteract) that know how to render the custom VegaLite MIME type listed above. """ # Local import to avoid circular ImportError from altair.vegalite.v6.display import VEGA_MIME_TYPE, VEGALITE_MIME_TYPE assert isinstance(spec, dict) bundle: dict[str, str | dict] = {} metadata: dict[str, dict[str, Any]] = {} if using_vegafusion(): spec = compile_with_vegafusion(spec) # Swap mimetype from Vega-Lite to Vega. # If mimetype was JSON, leave it alone if mime_type == VEGALITE_MIME_TYPE: mime_type = VEGA_MIME_TYPE bundle[mime_type] = spec bundle["text/plain"] = str_repr if options: metadata[mime_type] = options return bundle, metadata def json_renderer_base( spec: dict[str, Any], str_repr: str, **options ) -> DefaultRendererReturnType: """ A renderer that returns a MIME type of application/json. In JupyterLab/nteract this is rendered as a nice JSON tree. """ return default_renderer_base( spec, mime_type="application/json", str_repr=str_repr, **options ) class HTMLRenderer: """Object to render charts as HTML, with a unique output div each time.""" def __init__(self, output_div: str = "altair-viz-{}", **kwargs) -> None: self._output_div = output_div self.kwargs = kwargs @property def output_div(self) -> str: return self._output_div.format(uuid.uuid4().hex) def __call__(self, spec: dict[str, Any], **metadata) -> dict[str, str]: kwargs = self.kwargs.copy() kwargs.update(**metadata, output_div=self.output_div) return spec_to_mimebundle(spec, format="html", **kwargs) ================================================ FILE: altair/utils/execeval.py ================================================ from __future__ import annotations import ast import sys from typing import TYPE_CHECKING, Any, Literal, overload if TYPE_CHECKING: from collections.abc import Callable from os import PathLike if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self class _CatchDisplay: """Class to temporarily catch sys.displayhook.""" def __init__(self) -> None: self.output: Any | None = None def __enter__(self) -> Self: self.old_hook: Callable[[object], Any] = sys.displayhook sys.displayhook = self return self def __exit__(self, type, value, traceback) -> Literal[False]: sys.displayhook = self.old_hook # Returning False will cause exceptions to propagate return False def __call__(self, output: Any) -> None: self.output = output @overload def eval_block( code: str | Any, namespace: dict[str, Any] | None = ..., filename: str | bytes | PathLike[Any] = ..., *, strict: Literal[False] = ..., ) -> Any | None: ... @overload def eval_block( code: str | Any, namespace: dict[str, Any] | None = ..., filename: str | bytes | PathLike[Any] = ..., *, strict: Literal[True], ) -> Any: ... def eval_block( code: str | Any, namespace: dict[str, Any] | None = None, filename: str | bytes | PathLike[Any] = "", *, strict: bool = False, ) -> Any | None: """ Execute a multi-line block of code in the given namespace. If the final statement in the code is an expression, return the result of the expression. If ``strict``, raise a ``TypeError`` when the return value would be ``None``. """ tree = ast.parse(code, filename="", mode="exec") if namespace is None: namespace = {} catch_display = _CatchDisplay() if isinstance(tree.body[-1], ast.Expr): to_exec, to_eval = tree.body[:-1], tree.body[-1:] else: to_exec, to_eval = tree.body, [] for node in to_exec: compiled = compile(ast.Module([node], []), filename=filename, mode="exec") exec(compiled, namespace) with catch_display: for node in to_eval: compiled = compile( ast.Interactive([node]), filename=filename, mode="single" ) exec(compiled, namespace) if strict: output = catch_display.output if output is None: msg = f"Expected a non-None value but got {output!r}" raise TypeError(msg) else: return output else: return catch_display.output ================================================ FILE: altair/utils/html.py ================================================ from __future__ import annotations import json from typing import Any, Literal import jinja2 from altair.utils._importers import import_vl_convert, vl_version_for_vl_convert TemplateName = Literal["standard", "universal", "inline", "olli"] RenderMode = Literal["vega", "vega-lite"] HTML_TEMPLATE = jinja2.Template( """ {%- if fullhtml -%} {%- endif %} {%- if not requirejs %} {%- if mode == 'vega-lite' %} {%- endif %} {%- endif %} {%- if fullhtml %} {%- if requirejs %} {%- endif %} {%- endif %}
{%- if fullhtml %} {%- endif %} """ ) HTML_TEMPLATE_UNIVERSAL = jinja2.Template( """
""" ) # This is like the HTML_TEMPLATE template, but includes vega javascript inline # so that the resulting file is not dependent on external resources. This was # ported over from altair_saver. # # implies requirejs=False and full_html=True INLINE_HTML_TEMPLATE = jinja2.Template( """\
""" ) HTML_TEMPLATE_OLLI = jinja2.Template( """
""" ) TEMPLATES: dict[TemplateName, jinja2.Template] = { "standard": HTML_TEMPLATE, "universal": HTML_TEMPLATE_UNIVERSAL, "inline": INLINE_HTML_TEMPLATE, "olli": HTML_TEMPLATE_OLLI, } def spec_to_html( spec: dict[str, Any], mode: RenderMode, vega_version: str | None, vegaembed_version: str | None, vegalite_version: str | None = None, base_url: str = "https://cdn.jsdelivr.net/npm", output_div: str = "vis", embed_options: dict[str, Any] | None = None, json_kwds: dict[str, Any] | None = None, fullhtml: bool = True, requirejs: bool = False, template: jinja2.Template | TemplateName = "standard", ) -> str: """ Embed a Vega/Vega-Lite spec into an HTML page. Parameters ---------- spec : dict a dictionary representing a vega-lite plot spec. mode : string {'vega' | 'vega-lite'} The rendering mode. This value is overridden by embed_options['mode'], if it is present. vega_version : string For html output, the version of vega.js to use. vegalite_version : string For html output, the version of vegalite.js to use. vegaembed_version : string For html output, the version of vegaembed.js to use. base_url : string (optional) The base url from which to load the javascript libraries. output_div : string (optional) The id of the div element where the plot will be shown. embed_options : dict (optional) Dictionary of options to pass to the vega-embed script. Default entry is {'mode': mode}. json_kwds : dict (optional) Dictionary of keywords to pass to json.dumps(). fullhtml : boolean (optional) If True (default) then return a full html page. If False, then return an HTML snippet that can be embedded into an HTML page. requirejs : boolean (optional) If False (default) then load libraries from base_url using
================================================ FILE: doc/_static/custom.css ================================================ html[data-theme="light"] { --pst-color-primary: rgb(93, 154, 181); --pst-color-secondary: rgb(93, 154, 181); --pst-color-secondary-bg: rgb(223, 235, 240); } html[data-theme="dark"] { --pst-color-primary: rgb(93, 155, 182); --pst-color-secondary: rgb(93, 155, 182); --pst-color-secondary-bg: rgb(33, 45, 51); } .wy-nav-side p.caption { color: #F5F5F5; } div.wy-side-nav-search { background: #757575; } div.wy-side-nay { background: #212121; } table.field-list td li { line-height: 18px; } table.docutils td p { font-size: 14px !important; margin-bottom: 6px; } table.docutils td li { line-height: 18px; } table td.vl-type-def { max-width: 170px; overflow-x: clip; } /* Hide this empty container as it leads to a vertical scrollbar in the primary sidebar even if there is no need for such a scrollbar as all content would fit onto the screen */ #rtd-footer-container { display: none; } /* Default for the pydata theme is 25% */ .bd-sidebar-primary { max-width: 20% } /* By providing max-width above for .bd-sidebar-primary, we also overwrite the setting from the template for small screens, e.g. mobile phones. The values below are copied out of pydata-sphinx-theme.css so that the sidebar is again properly displayed on mobile devices and not restricted to 20% */ @media (max-width: 959.98px) { .bd-sidebar-primary { max-width: 350px; } } .properties-example .vega-bind-name { display: inline-block; min-width: 150px; } .properties-example .vega-bindings { padding-left: 20px; padding-bottom: 10px; } .properties-example .vega-bindings select { max-width: 180px; } .properties-example .vega-bindings input { vertical-align: text-bottom; margin-right: 3px; } .full-width-plot { width: 100%; } /* This hides the Ctrl + K from the search box on the start page * to make it less distracting on the home page. * The shortcut still shows up when clicking the search box */ .search-button-field > .search-button__kbd-shortcut { display: none; } /* Configurations for the start page ------------------------------------ */ .lead { font-size: 1.3em; font-weight: 300; margin-top: 22px; margin-bottom: 22px; /* This pushes down the lead so that it is not rendered on top of the gallery (showcase) which has an absolute position. The value is calculated as height (showcase) + margin-bottom (showcase) + margin-top (lead) */ padding-top: 332px; } .lead strong { /* Default is bolder which is less */ font-weight: bold; } /* ---------------------------------- */ ================================================ FILE: doc/_static/theme_overrides.css ================================================ /* override table width restrictions */ @media screen and (min-width: 767px) { .wy-table-responsive table td { /* !important prevents the common CSS stylesheets from overriding this as on RTD they are loaded after this stylesheet */ white-space: normal !important; } .wy-table-responsive { overflow: visible !important; } } .rst-content dl:not(.docutils) dt em { font-style: normal !important; line-height: 1.4em !important; } .rst-content div[class^='highlight'] { background: #fff !important; } img.logo { width: 120px !important; } /* Increase max-width of the content area slightly to accommodate larger screens */ .bd-main .bd-content .bd-article-container { max-width: 1000px; } ================================================ FILE: doc/_templates/class.rst ================================================ :mod:`{{module}}`.{{objname}} {{ underline }}============== .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} {% block methods %} .. automethod:: __init__ {% endblock %} .. raw:: html
================================================ FILE: doc/_templates/navbar-project.html ================================================

{{ project }}

================================================ FILE: doc/_templates/sidebar-logo.html ================================================ ================================================ FILE: doc/about/citing.rst ================================================ Citing =============== Vega-Altair ----------- If you use Vega-Altair in academic work, please consider citing `Altair: Interactive Statistical Visualizations for Python `_ as .. code-block:: @article{VanderPlas2018, doi = {10.21105/joss.01057}, url = {https://doi.org/10.21105/joss.01057}, year = {2018}, publisher = {The Open Journal}, volume = {3}, number = {32}, pages = {1057}, author = {Jacob VanderPlas and Brian Granger and Jeffrey Heer and Dominik Moritz and Kanit Wongsuphasawat and Arvind Satyanarayan and Eitan Lees and Ilia Timofeev and Ben Welsh and Scott Sievert}, title = {Altair: Interactive Statistical Visualizations for Python}, journal = {Journal of Open Source Software} } Vega-Lite --------- Please additionally consider citing the `Vega-Lite `_ project, which Vega-Altair is based on: `Vega-Lite: A Grammar of Interactive Graphics `_ .. code-block:: @article{Satyanarayan2017, author={Satyanarayan, Arvind and Moritz, Dominik and Wongsuphasawat, Kanit and Heer, Jeffrey}, title={Vega-Lite: A Grammar of Interactive Graphics}, journal={IEEE transactions on visualization and computer graphics}, year={2017}, volume={23}, number={1}, pages={341-350}, publisher={IEEE} } ================================================ FILE: doc/about/code_of_conduct.rst ================================================ Code of Conduct =============== As a project of the Vega Organization, we use the `Vega Code of Conduct `_. ================================================ FILE: doc/about/governance.rst ================================================ Governance ========== Vega-Altair's governance structure is based on GitHub's `Minimum Viable Governance `_ (MVG) template. Organizational Governance ------------------------- The Altair-Viz organization is governed by the documents that reside in the `Vega Organizational GitHub repository `_. Project Governance ------------------ The Vega-Altair library is governed by the documents that reside in the `project-docs `_ directory of the Vega Organizational GitHub repository. ================================================ FILE: doc/about/roadmap.rst ================================================ Roadmap ======= The roadmap for Vega-Altair and related projects can be found in `this project board `_. .. toctree:: :maxdepth: 1 :caption: About :hidden: self code_of_conduct governance citing versioning ================================================ FILE: doc/about/versioning.rst ================================================ Versioning ========== Vega-Altair has historically released major versions that coincide with those of Vega-Lite_. As the projects have matured, and major versions become less frequent, there has been a growing need to introduce breaking changes between these major versions. Such changes would allow Vega-Altair to address technical debt and improve upon API ergonomics. To ensure future releases clearly communicate changes, Vega-Altair will be working towards adopting SemVer_. Public API ---------- Functionality documented in :ref:`api` defines the Vega-Altair public API. Version numbers --------------- A Vega-Altair release number is composed of ``MAJOR.MINOR.PATCH``. * Backward incompatible API changes increment **MAJOR** version (``4.2.2`` - ``5.0.0``) * New backward compatible functionality increment **MINOR** version (``5.2.0`` - ``5.3.0``) * Backward compatible bug fixes increment **PATCH** version (``5.1.1`` - ``5.1.2``) **MAJOR** versions will *likely* continue to increase with a **MAJOR** increment to Vega-Lite_. Deprecation ----------- Deprecation warnings may be introduced in **MAJOR** and **MINOR** versions, but the removal of deprecated functionality will not occur until *at least* the next **MAJOR** version. For upstream breaking changes that trigger a **MAJOR** version, we *may* provide a deprecation warning if we consider the change especially disruptive. Starting in version ``5.4.0``, all deprecation warnings *must* specify: * the version number they were introduced Where possible, deprecation warnings *may* specify: * an alternative function/method/parameter/class to use instead * an explanation for why this change had to be made Deprecated functionality *may* be removed from the Vega-Altair documentation, if there is a suitable replacement and we believe inclusion of both could confuse new users. .. _Vega-Lite: https://github.com/vega/vega-lite .. _SemVer: https://semver.org/ ================================================ FILE: doc/case_studies/exploring-weather.rst ================================================ .. _exploring-weather: Exploring Seattle Weather ------------------------- (This tutorial is adapted from `Vega-Lite's documentation `_) In this tutorial, you’ll learn a few more techniques for creating visualizations in Altair. If you are not familiar with Altair, please read :ref:`starting` first. For this tutorial, we will create visualizations to explore weather data for Seattle, taken from NOAA. The dataset is a CSV file with columns for the temperature (in Celsius), precipitation (in millimeters), wind speed (in meter/second), and weather type. We have one row for each day from January 1st, 2012 to December 31st, 2015. Altair is designed to work with data in the form of pandas_ dataframes, and contains a loader for this and other built-in datasets: .. altair-plot:: :output: repr from altair.datasets import data df = data.seattle_weather() df.head() The data is loaded from the web and stored in a pandas DataFrame, and from here we can explore it with Altair. Let’s start by looking at the precipitation, using tick marks to see the distribution of precipitation values: .. altair-plot:: import altair as alt alt.Chart(df).mark_tick().encode( x='precipitation', ) It looks as though precipitation is skewed towards lower values; that is, when it rains in Seattle, it usually doesn’t rain very much. It is difficult to see patterns across continuous variables, and so to better see this, we can create a histogram of the precipitation data. For this we first discretize the precipitation values by adding a binning to ``x``. Additionally, we set our encoding channel ``y`` with ``count``. The result is a histogram of precipitation values: .. altair-plot:: alt.Chart(df).mark_bar().encode( alt.X('precipitation').bin(), y='count()' ) Next, let’s look at how precipitation in Seattle changes throughout the year. Altair natively supports dates and discretization of dates when we set the type to ``temporal`` (shorthand ``T``). For example, in the following plot, we compute the total precipitation for each month. To discretize the data into months, we can use a ``month`` binning (see :ref:`user-guide-timeunit-transform` for more information about this and other ``timeUnit`` binnings): .. altair-plot:: alt.Chart(df).mark_line().encode( x='month(date):T', y='average(precipitation)' ) This chart shows that in Seattle the precipitation in the winter is, on average, much higher than summer (an unsurprising observation to those who live there!). By changing the mapping of encoding channels to data features, you can begin to explore the relationships within the data. When looking at precipitation and temperature, we might want to aggregate by year *and* month (``yearmonth``) rather than just month. This allows us to see seasonal trends, with daily variation smoothed out. We might also wish to see the maximum and minimum temperature in each month: .. altair-plot:: alt.Chart(df).mark_line().encode( x='yearmonth(date):T', y='max(temp_max)', ) In this chart, it looks as though the maximum temperature is increasing from year to year over the course of this relatively short baseline. To look closer into this, let’s instead look at the mean of the maximum daily temperatures for each year: .. altair-plot:: alt.Chart(df).mark_line().encode( x='year(date):T', y='mean(temp_max)', ) This can be a little clearer if we use a bar plot and mark the year as an "ordinal" (ordered category) type. For aesthetic reasons, let's make the bar chart horizontal by assigning the ordinal value to the y-axis: .. altair-plot:: alt.Chart(df).mark_bar().encode( x='mean(temp_max)', y='year(date):O' ) The chart indicates that the annual average of the daily high temperatures increased over the course of these four years, a fact that you can confirm for minimum daily temperatures as well. You might also wonder how the daily temperature range changes throughout the year. For this, we have to add a computation to derive a new field, which can be done by adding a ``calculate`` transform: .. altair-plot:: alt.Chart(df).mark_bar().encode( x='mean(temp_range):Q', y='year(date):O' ).transform_calculate( temp_range="datum.temp_max - datum.temp_min" ) Note that this calculation doesn't actually do any data manipulation in Python, but rather encodes and stores the operations within the plot specification, where they will be calculated by the renderer. Of course, the same calculation could be done by using pandas manipulations to explicitly add a column to the dataframe; the disadvantage there is that the derived values would have to be stored in the plot specification rather than computed on-demand in the browser. Next we will explore the ``weather`` field, which encodes a categorical variable describing the weather on a given day. We might wish to know how different kinds of weather (e.g. sunny days or rainy days) are distributed throughout the year. To answer this, we can discretize the date by month and then count the number of records on the y-Axis. We then break down the bars by the weather type by mapping this column to a color channel. When a bar chart has a field mapped to color, Altair will automatically stack the bars atop each other: .. altair-plot:: alt.Chart(df).mark_bar().encode( x='month(date):N', y='count()', color='weather', ) The default color palette’s semantics might not match our expectation. For example, we probably do not expect “sun” (sunny) to be purple. We can tune the chart by providing a color scale range that maps the values from the weather field to meaningful colors, using standard hex color codes: .. altair-plot:: :output: none scale = alt.Scale(domain=['sun', 'fog', 'drizzle', 'rain', 'snow'], range=['#e7ba52', '#c7c7c7', '#aec7e8', '#1f77b4', '#9467bd']) This scale can be passed to the color encoding to be applied to the plot style. In addition, we can customize the titles for the axis and legend to make the meaning of the plot more clear: .. altair-plot:: alt.Chart(df).mark_bar().encode( x=alt.X('month(date):N').title('Month of the year'), y='count()', color=alt.Color('weather', legend=alt.Legend(title='Weather type'), scale=scale), ) Combining the above ideas lets us create any number of flexible visualizations of this dataset. For example, here is a plot that uses the customizations we have developed above to explore the relationship between weather, precipitation, maximum temperature, and temperature range, configured to use a larger canvas and to allow interactive panning and zooming with the mouse: .. altair-plot:: alt.Chart(df).mark_point().encode( alt.X('temp_max').title('Maximum Daily Temperature (C)'), alt.Y('temp_range:Q').title('Daily Temperature Range (C)'), alt.Color('weather').scale(scale), alt.Size('precipitation').scale(range=[1, 200]) ).transform_calculate( "temp_range", "datum.temp_max - datum.temp_min" ).properties( width=600, height=400 ).interactive() This gives us even more insight into the weather patterns in Seattle: rainy and foggy days tend to be cooler with a narrower range of temperatures, while warmer days tend to be dry and sunny, with a wider spread between low and high temperature. You can take this even further using Altair's building blocks for multi-panel charts and interactions. For example, we might construct a histogram of days by weather type: .. altair-plot:: alt.Chart(df).mark_bar().encode( x='count()', y='weather:N', color=alt.Color('weather:N').scale(scale), ) And now we can vertically concatenate this histogram to the points plot above, and add a brush selection tool such that the histogram reflects the content of the selection (for more information on selections, see :ref:`user-guide-interactions`): .. altair-plot:: brush = alt.selection_interval() color = alt.Color("weather:N").scale(scale) temp_range = alt.datum["temp_max"] - alt.datum["temp_min"] points = alt.Chart(width=600, height=400).mark_point().encode( alt.X("temp_max:Q").title("Maximum Daily Temperature (C)"), alt.Y("temp_range:Q").title("Daily Temperature Range (C)"), color=alt.when(brush).then(color).otherwise(alt.value("lightgray")), size=alt.Size("precipitation:Q").scale(range=[1, 200]), ).transform_calculate( temp_range=temp_range ).add_params( brush ) bars = alt.Chart(width=600).mark_bar().encode( x="count()", y="weather:N", color=color ).transform_calculate( temp_range=temp_range ).transform_filter( brush ) alt.vconcat(points, bars, data=df) This chart, containing concatenations, data transformations, selections, and customized axes labels and data scales, shows the power of the grammar behind Altair: you can create a complex chart from a small number of building blocks. This is the end of this tutorial where you have seen various ways to bin and aggregate data, derive new fields, and customize your charts. You can find more visualizations in the :ref:`example-gallery`. If you want to further customize your charts, you can refer to Altair's :ref:`api`. .. _pandas: http://pandas.pydata.org/ ================================================ FILE: doc/case_studies/index.rst ================================================ Tutorials --------- These tutorials explore more advanced use cases than the gallery. .. toctree:: :hidden: exploring-weather numpy-tooltip-images ================================================ FILE: doc/case_studies/numpy-tooltip-images.rst ================================================ .. _numpy-tooltip-imgs: Displaying Numpy Images in Tooltips ----------------------------------- In this tutorial, you’ll learn how to display images stored as Numpy arrays in tooltips with any Altair chart. First, we create some example image arrays with blobs (objects) of different sizes and shapes (circular and square). We measure the area of the blobs in order to have a quantitative measurement to compare them with in our charts. .. altair-plot:: :output: repr import numpy as np import pandas as pd from scipy import ndimage as ndi rng = np.random.default_rng([ord(c) for c in 'altair']) n_rows = 200 def create_blobs(blob_shape, img_width=96, n_dim=2, sizes=[0.05, 0.1, 0.15]): """Helper function to create blobs in the images""" shape = tuple([img_width] * n_dim) mask = np.zeros(shape) points = (img_width * rng.random(n_dim)).astype(int) mask[tuple(indices for indices in points)] = 1 if blob_shape == 'circle': im = ndi.gaussian_filter(mask, sigma=rng.choice(sizes) * img_width) elif blob_shape == 'square': im = ndi.uniform_filter(mask, size=rng.choice(sizes) * img_width, mode='constant') * rng.normal(4, size=(img_width, img_width)) return im / im.max() df = pd.DataFrame({ 'image1': [create_blobs('circle') for _ in range(n_rows)], 'image2': [create_blobs('square', sizes=[0.3, 0.4, 0.5]) for _ in range(n_rows)], 'group': rng.choice(['a', 'b', 'c'], size=n_rows) }) # Compute the area as the proportion of pixels above a threshold df[['image1_area', 'image2_area']] = df[['image1', 'image2']].map(lambda x: (x > 0.4).mean()) df Next, we define the function that will convert the Numpy arrays to base64-encoded_ strings. This is a necessary step for the tooltip to recognize that the data is in the form of an image and render it appropriately. .. altair-plot:: :output: repr from io import BytesIO from PIL import Image, ImageDraw import base64 def create_tooltip_image(df_row): """Concatenate, rescale, and convert images to base64 strings.""" # Concatenate images to show together in the tooltip # This can be skipped if only one image is to be displayed img_gap = np.ones([df_row['image1'].shape[0], 10]) # 10 px white gap between imgs img_arr = np.concatenate( [ df_row['image1'], img_gap, df_row['image2'] ], axis=1 ) # Create a PIL image from the array. # Multiplying by 255 and recasting as uint8 for the images to occupy the entire supported instensity space from 0-255 img = Image.fromarray((255 * img_arr).astype('uint8')) # Optional: Burn in labels as pixels in the images. Can be helpful to keep track of which image is which ImageDraw.Draw(img).text((3, 0), 'im1', fill=255) ImageDraw.Draw(img).text((3 + df_row['image1'].shape[1] + img_gap.shape[1], 0), 'im2', fill=255) # Convert to base64 encoded image string that can be displayed in the tooltip buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/png;base64,{img_str}" # The column with the base64 image string must be called "image" in order for it to trigger the image rendering in the tooltip df['image'] = df[['image1', 'image2']].apply(create_tooltip_image, axis=1) # Dropping the image arrays since they are large and no longer needed df_plot = df.drop(columns=['image1', 'image2']) df_plot Now we are ready to create the charts that show the images as tooltips when the dots are hovered with the mouse. We can see that the large white blobs correspond to the higher area measurements as expected. .. altair-plot:: import altair as alt # The random() function is used to jitter points in the x-direction alt.Chart(df_plot, width=alt.Step(40)).mark_circle(xOffset=alt.expr('random() * 16 - 8')).encode( x='group', y=alt.Y(alt.repeat(), type='quantitative'), tooltip=['image'], color='group', ).repeat( ['image1_area', 'image2_area'] ).resolve_scale( y='shared' ).properties( title='Comparison of blob areas' ) Note that when including images as part of the chart data, the chart size often increases several-fold. The size of the chart above would have been 19 Kb without the images, but with the images added it is 760 Kb. While this is a 20x size increase, the base64 encoding is still quite storage efficient; if we would have included the images in their original Numpy array format the chart size would have been 35Mb! If we want to have even more fun and get a bit more sophisticated, we could show one chart at a time and update what is shown on the y-axis as well as what is shown in the image tooltip based on a dropdown selector. We start by defining a tooltip that only contains a single image instead of both the images concatenated together. .. altair-plot:: :output: repr def create_tooltip_image(img_arr): """Rescale and convert an image to a base64 string.""" # print(img_arr) # Create a PIL image from the array. # Multiplying by 255 and recasting as uint8 for the images to occupy the entire supported instensity space from 0-255 img = Image.fromarray((255 * img_arr).astype('uint8')) # Convert to base64 encoded image string that can be displayed in the tooltip buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() return f"data:image/png;base64,{img_str}" # The column with the base64 image string must be called "image" in order for it to trigger the image rendering in the tooltip df[['image1_base64', 'image2_base64']] = df[['image1', 'image2']].map(create_tooltip_image) # Dropping the image arrays since they are large and no longer needed # Also drop the previous tooltip image for clarity df_plot = df.drop(columns=['image1', 'image2', 'image']) df_plot In our chart, we need to use a transform to update both the y-axis column as well as the tooltip column dynamically based on the selection in the dropdown. The comments in the code explain more in detail what each line in this chart specification does. .. altair-plot:: metric_dropdown = alt.binding_select( options=['image1_area', 'image2_area'], name='Image metric ' ) metric_param = alt.param( value='image1_area', bind=metric_dropdown ) alt.hconcat( # This first chart is the axis title and is only needed because # Vega-Lite does not yet support passing an expression directly to the axis title alt.Chart().mark_text(angle=270, dx=-150, fontWeight='bold').encode( alt.TextValue(alt.expr(f'{metric_param.name}')) ), alt.Chart(df_plot, width=alt.Step(40)).mark_circle(xOffset=alt.expr('random() * 16 - 8')).encode( x='group', y=alt.Y('image_area:Q').title(''), tooltip=['image:N'], color='group', ).properties( title='Area of blobs' ).transform_calculate( # This first line updates the image_area which is used for the y axis # to correspond to the selected string in the dropdown image_area=f'datum[{metric_param.name}]', # Since altair needs the tooltip field to be called `image`, we need to dynamically # change what's in the `image` field depending on the selection in the dropdown # This is further complicated by the fact that the string in the dropdown is not # an exact match for the column holding the image data so we need # to replace part of the name to match to match the corresponding base 64 image field image=f'datum[replace({metric_param.name}, "_area", "_base64")]', ) ).add_params( metric_param ) .. _base64-encoded: https://en.wikipedia.org/wiki/Binary-to-text_encoding ================================================ FILE: doc/conf.py ================================================ # !/usr/bin/env python3 # # altair documentation build configuration file, created by # sphinx-quickstart on Wed Sep 7 12:52:48 2016. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys from datetime import datetime # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) # noqa: PTH100 # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.doctest", "sphinx.ext.coverage", "sphinx.ext.githubpages", "numpydoc.numpydoc", "sphinxext_altair.altairplot", "sphinxext.altairgallery", "sphinxext.schematable", "sphinxext.code_ref", "sphinx_copybutton", "sphinx_design", ] altair_plot_links = {"editor": True, "source": False, "export": False} autodoc_default_flags = ["members", "inherited-members"] autodoc_member_order = "groupwise" autodoc_typehints = "none" # generate autosummary even if no references autosummary_generate = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] source_suffix = {".rst": "restructuredtext"} # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = "index" # General information about the project. project = "Vega-Altair" copyright = f"2015-{datetime.now().year}, Vega-Altair Developers" author = "Vega-Altair Developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = "6.1.0dev" # The full version, including alpha/beta/rc tags. release = f"{version}" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. # pygments_style = 'colorful' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "pydata_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { "navbar_start": ["navbar-logo", "navbar-project"], "navbar_center": ["navbar-nav"], "navbar_end": ["theme-switcher", "navbar-icon-links"], "primary_sidebar_end": [], "icon_links": [ { "name": "GitHub", "url": "https://github.com/vega/altair", "icon": "fab fa-github fa-lg", "type": "fontawesome", }, { "name": "StackOverflow", "url": "https://stackoverflow.com/tags/altair", "icon": "fab fa-stack-overflow fa-xl", "type": "fontawesome", }, ], "header_links_before_dropdown": 4, "analytics": { "plausible_analytics_domain": "altair-viz.github.io", "plausible_analytics_url": ("https://views.scientific-python.org/js/script.js"), }, } html_context = {"default_mode": "light"} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. # html_title = 'altair v1.0.0' # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = "Altair" # The name of an image file (relative to this directory) to place at the top # of the sidebar. html_logo = "_static/altair-logo-light.png" # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. html_favicon = "_static/favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static", "_images"] # adapted from: http://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html # and # https://github.com/rtfd/sphinx_rtd_theme/issues/117 def setup(app): app.add_css_file("theme_overrides.css") app.add_css_file("custom.css") # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. html_sidebars = { "index": [], "**": ["sidebar-nav-bs"], } # Redirection of old page locations via the rediraffe sphinx-extension # It seems like only pages can be redirected, not headings within pages # rediraffe_redirects = { # 'case_studies/exploring-weather.rst': 'user_guide/case_studies/exploring-weather.rst' # } # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. html_show_sourcelink = False # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = "altairdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', # Latex figure (float) alignment # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( master_doc, "altair.tex", "altair Documentation", "Altair Developers", "manual", ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "altair", "altair Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "altair", "altair Documentation", author, "altair", "One line description of project.", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False # Hide extra class members numpydoc_show_class_members = False # For the altairplot extension altairplot_links = {"editor": True, "source": True, "export": True} # Defaults for below are drawn from Altair; override here. # altairplot_vega_js_url = "https://cdn.jsdelivr.net/npm/vega@5" # altairplot_vegalite_js_url = "https://cdn.jsdelivr.net/npm/vega-lite@4" # altairplot_vegaembed_js_url = "https://cdn.jsdelivr.net/npm/vega-embed@7" ================================================ FILE: doc/getting_started/getting_help.rst ================================================ Getting Help ============ Altair is BSD-licensed and the source is available on `GitHub`_, where you can also report `bugs and feature requests`_. For general questions, please ask on `StackOverflow`_ using the `altair` tag. You can browse this documentation via the links in the top navigation panel or by viewing the full site :ref:`genindex`. In addition to reading this documentation page, it can be helpful to also browse the `Vega-Lite documentation `_. .. _GitHub: http://github.com/vega/altair .. _Git Issues: http://github.com/vega/altair/issues .. _Vega: http://vega.github.io/vega .. _Vega-Lite: http://vega.github.io/vega-lite .. _bugs and feature requests: https://github.com/vega/altair/issues/new/choose .. _StackOverflow: https://stackoverflow.com/tags/altair ================================================ FILE: doc/getting_started/installation.rst ================================================ .. currentmodule:: altair .. _installation: Installation ============ Altair can be installed, along with all its optional dependencies, using: .. code-block:: bash pip install "altair[all]" If you are using the conda_ package manager, the equivalent is: .. code-block:: bash conda install -c conda-forge altair-all At this point, you should be able to open any IDE compatible with Jupyter Notebooks, and execute any of the code from the :ref:`example-gallery`. For more information on how to display charts in various notebook environments and non-notebook IDEs, see :ref:`displaying-charts`. If you wish to install Altair with only the required dependencies, you can omit the ``[all]``/``-all`` suffix. Altair can also be installed with just the dependencies necessary for saving charts to offline HTML files or PNG/SVG/PDF formats, using: .. code-block:: bash pip install "altair[save]" Installing Altair in WASM / Pyodide environments ----------------------------------------------- Altair is included in the official Pyodide distribution. The version of Altair available in Pyodide can be found in the Pyodide package list: https://pyodide.org/en/stable/usage/packages-in-pyodide.html Altair can also be installed in browser-based Python environments such as Pyodide, PyScript, or other WebAssembly (WASM) runtimes using ``micropip``. For example, in a Pyodide-based environment: .. code-block:: python import micropip await micropip.install("altair") To install a specific version of Altair, specify the version explicitly: .. code-block:: python await micropip.install("altair==6.0.0") Development Installation ======================== Please see `CONTRIBUTING.md `_ for details on how to contribute to the Altair project. .. _conda: https://docs.conda.io/ .. _Vega-Lite: http://vega.github.io/vega-lite .. _JupyterLab: http://jupyterlab.readthedocs.io/ .. _Jupyter Notebook: https://jupyter-notebook.readthedocs.io/ ================================================ FILE: doc/getting_started/overview.rst ================================================ .. _overview: Overview ======== Vega-Altair is a declarative statistical visualization library for Python, based on Vega_ and Vega-Lite_. It offers a powerful and concise grammar that enables you to quickly build a wide range of statistical visualizations. Here is an example of using the API to visualize a dataset with an interactive scatter plot: .. altair-plot:: # import altair with an abbreviated alias import altair as alt # load a sample dataset as a pandas DataFrame from altair.datasets import data cars = data.cars() # make the chart alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color='Origin', ).interactive() The key idea is that you are declaring links between *data columns* and *visual encoding channels*, such as the x-axis, y-axis and color. The rest of the plot details are handled automatically. Building on this declarative system, a surprising range of plots, from simple to sophisticated, can be created using a concise grammar. The project is named after the `brightest star `_ in the constellation Aquila. From Earth's sky Altair appears close to Vega, the star from which our parent project drew its name. This documentation serves as the main reference for learning about Altair. Additional learning material and tutorials can be found in the :ref:`learning-resources` section. It can also be helpful to browse the `Vega-Lite documentation `_. .. _Vega: http://vega.github.io/vega .. _Vega-Lite: http://vega.github.io/vega-lite .. toctree:: :maxdepth: 1 :caption: Getting Started :hidden: self installation starting getting_help resources project_philosophy ================================================ FILE: doc/getting_started/project_philosophy.rst ================================================ Project Philosophy ================== Many excellent plotting libraries exist in Python, including: * `Matplotlib `_ * `Bokeh `_ * `Seaborn `_ * `Lightning `_ * `Plotly `_ * `pandas built-in plotting `_ * `HoloViews `_ * `VisPy `_ * `pygg `_ Each library does a particular set of things well. User Challenges --------------- However, such a proliferation of options creates great difficulty for users as they have to wade through all of these APIs to find which of them is the best for the task at hand. None of these libraries are optimized for high-level statistical visualization, so users have to assemble their own using a mishmash of APIs. For individuals just learning to work with data, this forces them to focus on learning APIs rather than exploring their data. Another challenge is current plotting APIs require the user to write code, even for incidental details of a visualization. This results in an unfortunate and unnecessary cognitive burden as the visualization type (histogram, scatterplot, etc.) can often be inferred using basic information such as the columns of interest and the data types of those columns. For example, if you are interested in the visualization of two numerical columns, a scatterplot is almost certainly a good starting point. If you add a categorical column to that, you probably want to encode that column using colors or facets. If inferring the visualization proves difficult at times, a simple user interface can construct a visualization without any coding. `Tableau `_ and the `Interactive Data Lab's `_ `Polestar `_ and `Voyager `_ are excellent examples of such UIs. Design Approach and Solution ---------------------------- We believe that these challenges can be addressed without the creation of yet another visualization library that has a programmatic API and built-in rendering. Vega-Altair's approach to building visualizations uses a layered design that leverages the full capabilities of existing visualization libraries: 1. Create a constrained, simple Python API (Vega-Altair) that is purely declarative 2. Use the API (Vega-Altair) to emit JSON output that follows the Vega-Lite spec 3. Render that spec using existing visualization libraries This approach enables users to perform exploratory visualizations with a much simpler API initially, pick an appropriate renderer for their usage case, and then leverage the full capabilities of that renderer for more advanced plot customization. We realize that a declarative API will necessarily be limited compared to the full programmatic APIs of Matplotlib, Bokeh, etc. That is a deliberate design choice we feel is needed to simplify the user experience of exploratory visualization. You can find a more detailed comparison between Plotly and Altair in `this StackOverflow answer `_. ================================================ FILE: doc/getting_started/resources.rst ================================================ .. _resources: Resources ========= We hope to make it easier to find learning resources and projects related to Altair by listing them here. If you know of a project that should be added, please let us know by opening an `Issue on GitHub `_. .. _learning-resources: Learning Material ----------------- This is a list of learning material that complements the official documentation and can help you learn more about how to use Altair. `Visualization Curriculum`_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ A data visualization curriculum from the UW data group that developed Vega-Lite. .. List of links. .. _`Visualization Curriculum`: https://uwdata.github.io/visualization-curriculum `Jupyter Notebook Tutorials`_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Jupyter Notebook tutorials and examples from the Altair authors. .. List of links. .. _`Jupyter Notebook Tutorials`: https://github.com/altair-viz/altair_notebooks `Pycon Tutorial`_ ~~~~~~~~~~~~~~~~~ Altair tutorial given at PyCon 2018 by the Altair author Jake VanderPlas. .. List of links. .. _`Pycon tutorial`: https://altair-viz.github.io/altair-tutorial `Data Visualization Course`_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This course covers how to create common statistical visualizations, tell stories with data, create geographical visualizations, and bring plots to life by adding interactive elements. Created at the University of British Columbia and can either be audited or taken as part of the `Key Capabilities for Data Science`_ certificate program. .. List of links. .. _`Data Visualization Course`: https://viz-learn.mds.ubc.ca .. _`Key Capabilities for Data Science`: https://extendedlearning.ubc.ca/programs/key-capabilities-data-science `Brief Introduction Videos`_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Calmcode provides a few videos that give a brief overview of the Altair library. .. List of links. .. _`Brief Introduction Videos`: https://calmcode.io/altair/introduction.html .. _altair-ecosystem: Related Projects ---------------- This is a list of projects which are directly related to Altair. There are many other packages that can be used in tandem with Altair, e.g. `dashboard packages which you can read more about in the answers to this StackOverflow question`_. .. List of links. .. _`dashboard packages which you can read more about in the answers to this StackOverflow question`: https://stackoverflow.com/questions/49833866/making-dashboards-using-altair Vega-Lite_ ~~~~~~~~~~ The higher-level visualization grammar that Altair implements in Python. .. List of links. .. _Vega-Lite: https://vega.github.io/vega-lite vl-convert_ ~~~~~~~~~~~ Python library for converting Altair/Vega-Lite chart specifications into static images (SVG or PNG) or Vega chart specifications without any external dependencies. .. List of links. .. _vl-convert: https://github.com/vega/vl-convert VegaFusion_ ~~~~~~~~~~~ VegaFusion provides server-side scaling for Altair charts, which can accelerate interactive charts, extract transformed data, and perform data-intensive aggregations on the server and prune unused columns from the source dataset yielding smaller size visualizations. .. List of links. .. _VegaFusion: https://vegafusion.io/ altair_data_server_ ~~~~~~~~~~~~~~~~~~~ Data transformer plugin that transparently serves data for charts. .. List of links. .. _altair_data_server: https://github.com/altair-viz/altair_data_server altair_pandas_ ~~~~~~~~~~~~~~ Altair backend for the pandas plotting API. .. List of links. .. _altair_pandas: https://github.com/altair-viz/altair_pandas altair_recipes_ ~~~~~~~~~~~~~~~ altair_recipes provides a collection of ready-made statistical graphics for Altair. See the `docs `__. .. List of links. .. _altair_recipes: https://github.com/piccolbo/altair_recipes nx_altair_ ~~~~~~~~~~ nx_altair is a library for drawing NetworkX_ graphs using Altair. It offers a similar draw API as NetworkX but returns Altair Charts instead. This allows users to apply Altair's rich interactive API to networks graphs. See the `docs `__. .. List of links. .. _nx_altair: https://github.com/Zsailer/nx_altair .. _NetworkX: https://networkx.github.io/ `Altair Ally`_ ~~~~~~~~~~~~~~ Altair Ally is a companion package to Altair, which provides a few shortcuts to create common plots for exploratory data analysis, particularly those involving visualization of an entire dataframe. .. List of links. .. _`Altair Ally`: https://github.com/vega/altair_ally gif_ ~~~~ gif is the extension for Altair and matplotlib animations. The library provides a simple, high-level decorator interface to create frames in a regular for-loop that can be stitched together on save. See the `docs `__. .. List of links. .. _gif: https://github.com/maxhumber/gif `Altair in R`_ ~~~~~~~~~~~~~~ Altair in R provides an R interface to the Altair Python package. See the `docs `__. .. List of links. .. _`Altair in R`: https://github.com/vegawidget/altair Altair-upset_ ~~~~~~~~~~~~~ Create beautiful and interactive UpSet plots using Altair. UpSet plots are a powerful alternative to Venn diagrams for visualizing set intersections, especially when dealing with many sets. The library supports both Pandas and Polars DataFrames, making it flexible for different data processing workflows. .. List of links. .. _altair-upset: https://altair-upset.readthedocs.io/en/latest/ .. _UpSet Plots: https://upset.app ================================================ FILE: doc/getting_started/starting.rst ================================================ .. _starting: Basic Statistical Visualization =================================== (This tutorial is adapted from `Vega-Lite's documentation `_) .. currentmodule:: altair This tutorial will guide you through the basic process of creating visualizations in Altair. First, you will need to make sure you have the Altair package and its dependencies installed (see :ref:`installation`). This tutorial will assume you are working within a Jupyter notebook user interface (such as JupyterLab, Colab or VS Code), so that plots are automatically rendered. If you are using another interface, you may want to read about how Altair plots are displayed before proceeding (see :ref:`displaying-charts`). Here is the outline of this basic tutorial: - :ref:`basic-tutorial-data` - :ref:`basic-tutorial-encodings-and-marks` - :ref:`basic-tutorial-aggregation` - :ref:`basic-tutorial-customization` - :ref:`basic-tutorial-publishing` .. _basic-tutorial-data: The Data -------- Data in Altair is built around the pandas Dataframe. One of the defining characteristics of statistical visualization is that it begins with `tidy `_ Dataframes. For the purposes of this tutorial, we'll start by importing pandas and creating a simple DataFrame to visualize, with a categorical variable in column a and a numerical variable in column b: .. altair-plot:: :output: none import pandas as pd data = pd.DataFrame({'a': list('CCCDDDEEE'), 'b': [2, 7, 4, 1, 2, 6, 8, 4, 7]}) When using Altair, datasets are most commonly provided as a Dataframe. As we will see, the labeled columns of the dataframe are an essential piece of plotting with Altair. .. _basic-tutorial-chart-object: The Chart Object ---------------- The fundamental object in Altair is the :class:`Chart`, which takes a dataframe as a single argument: .. altair-plot:: :output: none import altair as alt chart = alt.Chart(data) So far, we have defined the Chart object, but we have not yet told the chart to *do* anything with the data. That will come next. .. _basic-tutorial-encodings-and-marks: Encodings and Marks ------------------- With this chart object in hand, we can now specify how we would like the data to be visualized. This is done via the ``mark`` attribute of the chart object, which is most conveniently accessed via the ``Chart.mark_*`` methods. For example, we can show the data as a point using :meth:`~Chart.mark_point`: .. altair-plot:: alt.Chart(data).mark_point() Here the rendering consists of one point per row in the dataset, all plotted on top of each other, since we have not yet specified positions for these points. To visually separate the points, we can map various *encoding channels*, or *channels* for short, to columns in the dataset. For example, we could *encode* the variable ``a`` of the data with the ``x`` channel, which represents the x-axis position of the points. This can be done straightforwardly via the :meth:`Chart.encode` method: .. altair-plot:: alt.Chart(data).mark_point().encode( x='a', ) The ``encode()`` method builds a key-value mapping between encoding channels (such as ``x``, ``y``, ``color``, ``shape``, ``size``, etc.) to columns in the dataset, accessed by column name. For pandas dataframes, Altair automatically determines the appropriate data type for the mapped column, which in this case is a *nominal* value, or an unordered categorical. Though we've now separated the data by one attribute, we still have multiple points overlapping within each category. Let's further separate these by adding a ``y`` encoding channel, mapped to the ``"b"`` column: .. altair-plot:: alt.Chart(data).mark_point().encode( x='a', y='b' ) The type of the data in the ``"b"`` column is again automatically-inferred by Altair, and this time is treated as a *quantitative* type (i.e. real-valued). Additionally, we see that grid lines and appropriate axis titles are automatically added as well. .. _basic-tutorial-aggregation: Data Transformation: Aggregation -------------------------------- To allow for more flexibility in how data are visualized, Altair has a built-in syntax for *aggregation* of data. For example, we can compute the average of all values by specifying this aggregate within the column identifier: .. altair-plot:: alt.Chart(data).mark_point().encode( x='a', y='average(b)' ) Now within each x-axis category, we see a single point reflecting the average of the values within that category. Typically, aggregated values are not represented by point markings, but by bar markings. We can do this by replacing :meth:`~Chart.mark_point` with :meth:`~Chart.mark_bar`: .. altair-plot:: alt.Chart(data).mark_bar().encode( x='a', y='average(b)' ) Because the categorical feature is mapped to the ``x``-axis, the result is a vertical bar chart. To get a horizontal bar chart, all we need is to swap the ``x`` and ``y`` keywords: .. altair-plot:: alt.Chart(data).mark_bar().encode( y='a', x='average(b)' ) Aside: Examining the JSON Output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Recall that Altair's main purpose is to convert plot specifications to a JSON string that conforms to the Vega-Lite schema. It is instructive here to use the :meth:`~Chart.to_json` method to inspect the JSON specification that Altair is exporting and sending as JSON to Vega-Lite: .. altair-plot:: :output: stdout chart = alt.Chart(data).mark_bar().encode( x='a', y='average(b)', ) print(chart.to_json()) Notice here that ``encode(x='a')`` has been expanded to a JSON structure with a ``field`` name, and a ``type`` for the data. The ``encode(y='b')`` has been expanded similarly and includes an ``aggregate`` field. Altair's full shorthand syntax includes a way to specify the type of the column as well: .. altair-plot:: :output: stdout y = alt.Y('average(b):Q') print(y.to_json()) This short-hand is equivalent to spelling-out the parameters by name: .. altair-plot:: :output: repr y = alt.Y(field='b', type='quantitative', aggregate='average') print(y.to_json()) This more verbose means of specifying channels can be used directly in Altair chart specifications, a fact that becomes useful when using some of the more advanced field configurations: .. altair-plot:: alt.Chart(data).mark_bar().encode( alt.Y('a', type='nominal'), alt.X('b', type='quantitative', aggregate='average') ) .. _basic-tutorial-customization: Customizing your Visualization ------------------------------ By default, Altair via Vega-Lite makes some choices about default properties of the visualization. Altair also provides an API to customize the look of the visualization. For example, we can specify the axis titles using the :meth:`title` method of channel classes, and we can specify the color of the mark by setting the ``color`` keyword of the ``Chart.mark_*`` method to any valid HTML color string: .. altair-plot:: alt.Chart(data).mark_bar(color='firebrick').encode( alt.Y('a').title('category'), alt.X('average(b)').title('avg(b) by category') ) .. _basic-tutorial-publishing: Publishing your Visualization ----------------------------- Once you have visualized your data, perhaps you would like to publish it somewhere on the web. This can be done straightforwardly using the Vega-Embed_ Javascript package. A simple example of a stand-alone HTML document can be generated for any chart using the :meth:`Chart.save` method: .. code-block:: python chart = alt.Chart(data).mark_bar().encode( x='a', y='average(b)', ) chart.save('chart.html') The basic HTML template produces output that looks like this, where the JSON specification for your plot produced by :meth:`Chart.to_json` should be stored in the ``spec`` Javascript variable: .. code-block:: html
The :meth:`~Chart.save` method provides a convenient way to save such HTML output to file. For more information on embedding Altair/Vega-Lite, see the documentation of the Vega-Embed_ project. .. _Vega-Embed: https://github.com/vega/vega-embed ================================================ FILE: doc/index.rst ================================================ :html_theme.sidebar_secondary.remove: Vega-Altair: Declarative Visualization in Python ================================================ .. role:: raw-html(raw) :format: html .. altair-minigallery:: :names: one_dot_per_zipcode, horizon_graph, world_projections, candlestick_chart, falkensee, errorbars_with_ci, scatter_linked_brush, line_with_ci, natural_disasters, bar_rounded, streamgraph, multiline_tooltip, choropleth, select_detail, interactive_cross_highlight, seattle_weather_interactive, london_tube, ridgeline_plot, violin_plot, strip_plot, table_bubble_plot_github, radial_chart, boxplot, mosaic_with_labels :size: 24 .. rst-class:: lead **Vega-Altair** is a declarative visualization library for Python. Its simple, friendly and consistent API, built on top of the powerful Vega-Lite_ grammar, empowers you to spend less time writing code and more time exploring your data. .. grid:: 1 1 2 2 :padding: 0 2 3 5 :gutter: 2 2 3 3 :class-container: startpage-grid .. grid-item-card:: Getting Started :link: overview :link-type: ref :link-alt: Getting started In the Getting Started section you can find installation instructions and a high-level overview of the main concepts. .. grid-item-card:: User Guide :link: user-guide-data :link-type: ref :link-alt: User guide Check out the User Guides for in-depth information on the key concepts of Vega-Altair. .. grid-item-card:: Examples :link: example-gallery :link-type: ref :link-alt: Examples The Examples gallery contains a selection of different visualizations which you can create with Vega-Altair. .. grid-item-card:: API :link: api :link-type: ref :link-alt: api The API reference guide contains detailed information on all of Vega-Altair's methods and classes. *The Vega-Altair open-source project is not affiliated with Altair Engineering, Inc.* .. toctree:: :maxdepth: 1 :hidden: Getting Started User Guide Examples API Release Notes About .. _GitHub: http://github.com/vega/altair .. _Git Issues: http://github.com/vega/altair/issues .. _Vega-Lite: http://vega.github.io/vega-lite .. _bugs and feature requests: https://github.com/vega/altair/issues/new/choose .. _StackOverflow: https://stackoverflow.com/tags/altair ================================================ FILE: doc/releases/changes.rst ================================================ :orphan: Release Notes ============= We have moved the release notes to GitHub. You can now find them `here `_. ================================================ FILE: doc/user_guide/api.rst ================================================ .. _api: API Reference ============= This is the class and function reference of Altair, and the following content is generated automatically from the code documentation strings. Please refer to the `full user guide `_ for further details, as this low-level documentation may not be enough to give full guidelines on their use. .. _api-toplevel: Top-Level Objects ----------------- .. currentmodule:: altair .. autosummary:: :toctree: generated/toplevel/ :nosignatures: Chart ConcatChart FacetChart HConcatChart LayerChart RepeatChart TopLevelMixin VConcatChart .. _api-channels: Encoding Channels ----------------- .. currentmodule:: altair .. autosummary:: :toctree: generated/channels/ :nosignatures: Angle AngleDatum AngleValue Color ColorDatum ColorValue Column Description DescriptionValue Detail Facet Fill FillDatum FillOpacity FillOpacityDatum FillOpacityValue FillValue Href HrefValue Key Latitude Latitude2 Latitude2Datum Latitude2Value LatitudeDatum Longitude Longitude2 Longitude2Datum Longitude2Value LongitudeDatum Opacity OpacityDatum OpacityValue Order OrderValue Radius Radius2 Radius2Datum Radius2Value RadiusDatum RadiusValue Row Shape ShapeDatum ShapeValue Size SizeDatum SizeValue Stroke StrokeDash StrokeDashDatum StrokeDashValue StrokeDatum StrokeOpacity StrokeOpacityDatum StrokeOpacityValue StrokeValue StrokeWidth StrokeWidthDatum StrokeWidthValue Text TextDatum TextValue Theta Theta2 Theta2Datum Theta2Value ThetaDatum ThetaValue Time Tooltip TooltipValue Url UrlValue X X2 X2Datum X2Value XDatum XError XError2 XError2Value XErrorValue XOffset XOffsetDatum XOffsetValue XValue Y Y2 Y2Datum Y2Value YDatum YError YError2 YError2Value YErrorValue YOffset YOffsetDatum YOffsetValue YValue .. _api-functions: API Functions ------------- .. currentmodule:: altair .. autosummary:: :toctree: generated/api/ :nosignatures: binding binding_checkbox binding_radio binding_range binding_select check_fields_and_encodings concat condition graticule hconcat layer param repeat selection_interval selection_point sequence sphere topo_feature value vconcat when .. _api-theme: Theme ----- .. currentmodule:: altair.theme .. autosummary:: :toctree: generated/theme/ :nosignatures: active enable get names options register unregister ThemeConfig AreaConfigKwds AutoSizeParamsKwds AxisConfigKwds AxisResolveMapKwds BarConfigKwds BindCheckboxKwds BindDirectKwds BindInputKwds BindRadioSelectKwds BindRangeKwds BoxPlotConfigKwds BrushConfigKwds CompositionConfigKwds ConfigKwds DateTimeKwds DerivedStreamKwds ErrorBandConfigKwds ErrorBarConfigKwds FeatureGeometryGeoJsonPropertiesKwds FormatConfigKwds GeoJsonFeatureCollectionKwds GeoJsonFeatureKwds GeometryCollectionKwds GradientStopKwds HeaderConfigKwds IntervalSelectionConfigKwds IntervalSelectionConfigWithoutTypeKwds LegendConfigKwds LegendResolveMapKwds LegendStreamBindingKwds LineConfigKwds LineStringKwds LinearGradientKwds LocaleKwds MarkConfigKwds MergedStreamKwds MultiLineStringKwds MultiPointKwds MultiPolygonKwds NumberLocaleKwds OverlayMarkDefKwds PaddingKwds PointKwds PointSelectionConfigKwds PointSelectionConfigWithoutTypeKwds PolygonKwds ProjectionConfigKwds ProjectionKwds RadialGradientKwds RangeConfigKwds RectConfigKwds ResolveKwds RowColKwds ScaleConfigKwds ScaleInvalidDataConfigKwds ScaleResolveMapKwds SelectionConfigKwds StepKwds StyleConfigIndexKwds TickConfigKwds TimeIntervalStepKwds TimeLocaleKwds TitleConfigKwds TitleParamsKwds TooltipContentKwds TopLevelSelectionParameterKwds VariableParameterKwds ViewBackgroundKwds ViewConfigKwds .. _api-core: Low-Level Schema Wrappers ------------------------- .. currentmodule:: altair .. autosummary:: :toctree: generated/core/ :nosignatures: Aggregate AggregateOp AggregateTransform AggregatedFieldDef Align AllSortString AnyMark AnyMarkConfig AreaConfig ArgmaxDef ArgminDef AutoSizeParams AutosizeType Axis AxisConfig AxisOrient AxisResolveMap BBox BarConfig BaseTitleNoValueRefs Baseline BinExtent BinParams BinTransform BindCheckbox BindDirect BindInput BindRadioSelect BindRange Binding BinnedTimeUnit Blend BoxPlot BoxPlotConfig BoxPlotDef BrushConfig CalculateTransform Categorical ColorDef ColorName ColorScheme CompositeMark CompositeMarkDef CompositionConfig ConcatSpecGenericSpec ConditionalAxisColor ConditionalAxisLabelAlign ConditionalAxisLabelBaseline ConditionalAxisLabelFontStyle ConditionalAxisLabelFontWeight ConditionalAxisNumber ConditionalAxisNumberArray ConditionalAxisPropertyAlignnull ConditionalAxisPropertyColornull ConditionalAxisPropertyFontStylenull ConditionalAxisPropertyFontWeightnull ConditionalAxisPropertyTextBaselinenull ConditionalAxisPropertynumberArraynull ConditionalAxisPropertynumbernull ConditionalAxisPropertystringnull ConditionalAxisString ConditionalMarkPropFieldOrDatumDef ConditionalMarkPropFieldOrDatumDefTypeForShape ConditionalParameterMarkPropFieldOrDatumDef ConditionalParameterMarkPropFieldOrDatumDefTypeForShape ConditionalParameterStringFieldDef ConditionalParameterValueDefGradientstringnullExprRef ConditionalParameterValueDefTextExprRef ConditionalParameterValueDefnumber ConditionalParameterValueDefnumberArrayExprRef ConditionalParameterValueDefnumberExprRef ConditionalParameterValueDefstringExprRef ConditionalParameterValueDefstringnullExprRef ConditionalPredicateMarkPropFieldOrDatumDef ConditionalPredicateMarkPropFieldOrDatumDefTypeForShape ConditionalPredicateStringFieldDef ConditionalPredicateValueDefAlignnullExprRef ConditionalPredicateValueDefColornullExprRef ConditionalPredicateValueDefFontStylenullExprRef ConditionalPredicateValueDefFontWeightnullExprRef ConditionalPredicateValueDefGradientstringnullExprRef ConditionalPredicateValueDefTextBaselinenullExprRef ConditionalPredicateValueDefTextExprRef ConditionalPredicateValueDefnumber ConditionalPredicateValueDefnumberArrayExprRef ConditionalPredicateValueDefnumberArraynullExprRef ConditionalPredicateValueDefnumberExprRef ConditionalPredicateValueDefnumbernullExprRef ConditionalPredicateValueDefstringExprRef ConditionalPredicateValueDefstringnullExprRef ConditionalStringFieldDef ConditionalValueDefGradientstringnullExprRef ConditionalValueDefTextExprRef ConditionalValueDefnumber ConditionalValueDefnumberArrayExprRef ConditionalValueDefnumberExprRef ConditionalValueDefstringExprRef ConditionalValueDefstringnullExprRef Config CsvDataFormat Cursor Cyclical Data DataFormat DataSource Datasets DateTime DatumDef Day DensityTransform DerivedStream Dict DictInlineDataset DictSelectionInit DictSelectionInitInterval Diverging DomainUnionWith DsvDataFormat Element Encoding EncodingSortField ErrorBand ErrorBandConfig ErrorBandDef ErrorBar ErrorBarConfig ErrorBarDef ErrorBarExtent EventStream EventType Expr ExprRef ExtentTransform FacetEncodingFieldDef FacetFieldDef FacetMapping FacetSpec FacetedEncoding FacetedUnitSpec Feature FeatureCollection FeatureGeometryGeoJsonProperties Field FieldDefWithoutScale FieldEqualPredicate FieldGTEPredicate FieldGTPredicate FieldLTEPredicate FieldLTPredicate FieldName FieldOneOfPredicate FieldOrDatumDefWithConditionDatumDefGradientstringnull FieldOrDatumDefWithConditionDatumDefnumber FieldOrDatumDefWithConditionDatumDefnumberArray FieldOrDatumDefWithConditionDatumDefstringnull FieldOrDatumDefWithConditionMarkPropFieldDefGradientstringnull FieldOrDatumDefWithConditionMarkPropFieldDefTypeForShapestringnull FieldOrDatumDefWithConditionMarkPropFieldDefnumber FieldOrDatumDefWithConditionMarkPropFieldDefnumberArray FieldOrDatumDefWithConditionStringDatumDefText FieldOrDatumDefWithConditionStringFieldDefText FieldOrDatumDefWithConditionStringFieldDefstring FieldRange FieldRangePredicate FieldValidPredicate FilterTransform Fit FlattenTransform FoldTransform FontStyle FontWeight Format FormatConfig Generator GenericUnitSpecEncodingAnyMark GeoJsonFeature GeoJsonFeatureCollection GeoJsonProperties Geometry GeometryCollection Gradient GradientStop GraticuleGenerator GraticuleParams HConcatSpecGenericSpec Header HeaderConfig HexColor ImputeMethod ImputeParams ImputeSequence ImputeTransform InlineData InlineDataset Interpolate IntervalSelectionConfig IntervalSelectionConfigWithoutType JoinAggregateFieldDef JoinAggregateTransform JsonDataFormat LabelOverlap LatLongDef LatLongFieldDef LayerRepeatMapping LayerRepeatSpec LayerSpec LayoutAlign Legend LegendBinding LegendConfig LegendOrient LegendResolveMap LegendStreamBinding LineConfig LineString LinearGradient LocalMultiTimeUnit LocalSingleTimeUnit Locale LoessTransform LogicalAndPredicate LogicalNotPredicate LogicalOrPredicate LookupData LookupSelection LookupTransform Mark MarkConfig MarkDef MarkInvalidDataMode MarkPropDefGradientstringnull MarkPropDefnumber MarkPropDefnumberArray MarkPropDefstringnullTypeForShape MarkType MergedStream Month MultiLineString MultiPoint MultiPolygon MultiTimeUnit NamedData NonArgAggregateOp NonLayerRepeatSpec NonNormalizedSpec NumberLocale NumericArrayMarkPropDef NumericMarkPropDef OffsetDef OrderFieldDef OrderOnlyDef OrderValueDef Orient Orientation OverlayMarkDef Padding ParameterExtent ParameterName ParameterPredicate Parse ParseValue PivotTransform Point PointSelectionConfig PointSelectionConfigWithoutType PolarDef Polygon Position Position2Def PositionDatumDef PositionDatumDefBase PositionDef PositionFieldDef PositionFieldDefBase PositionValueDef Predicate PredicateComposition PrimitiveValue Projection ProjectionConfig ProjectionType QuantileTransform RadialGradient RangeConfig RangeEnum RangeRaw RangeRawArray RangeScheme RectConfig RegressionTransform RelativeBandSize RepeatMapping RepeatRef RepeatSpec Resolve ResolveMode Root RowColLayoutAlign RowColboolean RowColnumber RowColumnEncodingFieldDef SampleTransform Scale ScaleBinParams ScaleBins ScaleConfig ScaleDatumDef ScaleFieldDef ScaleInterpolateEnum ScaleInterpolateParams ScaleInvalidDataConfig ScaleInvalidDataShowAsValueangle ScaleInvalidDataShowAsValuecolor ScaleInvalidDataShowAsValuefill ScaleInvalidDataShowAsValuefillOpacity ScaleInvalidDataShowAsValueopacity ScaleInvalidDataShowAsValueradius ScaleInvalidDataShowAsValueshape ScaleInvalidDataShowAsValuesize ScaleInvalidDataShowAsValuestroke ScaleInvalidDataShowAsValuestrokeDash ScaleInvalidDataShowAsValuestrokeOpacity ScaleInvalidDataShowAsValuestrokeWidth ScaleInvalidDataShowAsValuetheta ScaleInvalidDataShowAsValuetime ScaleInvalidDataShowAsValuex ScaleInvalidDataShowAsValuexOffset ScaleInvalidDataShowAsValuey ScaleInvalidDataShowAsValueyOffset ScaleInvalidDataShowAsangle ScaleInvalidDataShowAscolor ScaleInvalidDataShowAsfill ScaleInvalidDataShowAsfillOpacity ScaleInvalidDataShowAsopacity ScaleInvalidDataShowAsradius ScaleInvalidDataShowAsshape ScaleInvalidDataShowAssize ScaleInvalidDataShowAsstroke ScaleInvalidDataShowAsstrokeDash ScaleInvalidDataShowAsstrokeOpacity ScaleInvalidDataShowAsstrokeWidth ScaleInvalidDataShowAstheta ScaleInvalidDataShowAstime ScaleInvalidDataShowAsx ScaleInvalidDataShowAsxOffset ScaleInvalidDataShowAsy ScaleInvalidDataShowAsyOffset ScaleResolveMap ScaleType SchemaBase SchemeParams SecondaryFieldDef SelectionConfig SelectionInit SelectionInitInterval SelectionInitIntervalMapping SelectionInitMapping SelectionParameter SelectionResolution SelectionType SequenceGenerator SequenceParams SequentialMultiHue SequentialSingleHue ShapeDef SharedEncoding SingleDefUnitChannel SingleTimeUnit Sort SortArray SortByChannel SortByChannelDesc SortByEncoding SortField SortOrder Spec SphereGenerator StackOffset StackTransform StandardType Step StepFor Stream StringFieldDef StringFieldDefWithCondition StringValueDefWithCondition StrokeCap StrokeJoin StyleConfigIndex SymbolShape TextBaseline TextDef TextDirection TickConfig TickCount TimeDef TimeFieldDef TimeFormatSpecifier TimeInterval TimeIntervalStep TimeLocale TimeUnit TimeUnitParams TimeUnitTransform TimeUnitTransformParams TitleAnchor TitleConfig TitleFrame TitleOrient TitleParams TooltipContent TopLevelConcatSpec TopLevelFacetSpec TopLevelHConcatSpec TopLevelLayerSpec TopLevelParameter TopLevelRepeatSpec TopLevelSelectionParameter TopLevelSpec TopLevelUnitSpec TopLevelVConcatSpec TopoDataFormat Transform Type TypeForShape TypedFieldDef URI UnitSpec UnitSpecWithFrame UrlData UtcMultiTimeUnit UtcSingleTimeUnit VConcatSpecGenericSpec ValueDefWithConditionMarkPropFieldOrDatumDefGradientstringnull ValueDefWithConditionMarkPropFieldOrDatumDefTypeForShapestringnull ValueDefWithConditionMarkPropFieldOrDatumDefnumber ValueDefWithConditionMarkPropFieldOrDatumDefnumberArray ValueDefWithConditionMarkPropFieldOrDatumDefstringnull ValueDefWithConditionStringFieldDefText ValueDefnumber ValueDefnumberwidthheightExprRef VariableParameter Vector10string Vector12string Vector2DateTime Vector2Vector2number Vector2boolean Vector2number Vector2string Vector3number Vector7string VegaLiteSchema ViewBackground ViewConfig WindowEventType WindowFieldDef WindowOnlyOp WindowTransform .. _api-cls: API Utility Classes ------------------- .. currentmodule:: altair .. autosummary:: :toctree: generated/api-cls/ :nosignatures: expr When Then ChainedWhen .. _api-typing: Typing ------ .. currentmodule:: altair.typing .. autosummary:: :toctree: generated/typing/ :nosignatures: ChannelAngle ChannelColor ChannelColumn ChannelDescription ChannelDetail ChannelFacet ChannelFill ChannelFillOpacity ChannelHref ChannelKey ChannelLatitude ChannelLatitude2 ChannelLongitude ChannelLongitude2 ChannelOpacity ChannelOrder ChannelRadius ChannelRadius2 ChannelRow ChannelShape ChannelSize ChannelStroke ChannelStrokeDash ChannelStrokeOpacity ChannelStrokeWidth ChannelText ChannelTheta ChannelTheta2 ChannelTooltip ChannelUrl ChannelX ChannelX2 ChannelXError ChannelXError2 ChannelXOffset ChannelY ChannelY2 ChannelYError ChannelYError2 ChannelYOffset ChartType EncodeKwds Optional is_chart_type .. _api-datasets: Datasets -------- .. currentmodule:: altair.datasets .. autosummary:: :toctree: generated/datasets/ :nosignatures: Loader data load url .. _Generic: https://typing.readthedocs.io/en/latest/spec/generics.html#generics .. _vega-datasets: https://github.com/vega/vega-datasets ================================================ FILE: doc/user_guide/compound_charts.rst ================================================ .. currentmodule:: altair .. _user-guide-compound: Layered & Multi-View Charts --------------------------- Along with the basic :class:`Chart` object, Altair provides a number of compound plot types that can be used to create stacked, layered, faceted, and repeated charts. They are summarized in the following tables: ====================== =============================== =================== ====================== class functional form operator form reference ====================== =============================== =================== ====================== :class:`LayerChart` ``alt.layer(chart1, chart2)`` ``chart1 + chart2`` :ref:`layer-chart` :class:`HConcatChart` ``alt.hconcat(chart1, chart2)`` ``chart1 | chart2`` :ref:`hconcat-chart` :class:`VConcatChart` ``alt.vconcat(chart1, chart2)`` ``chart1 & chart2`` :ref:`vconcat-chart` ====================== =============================== =================== ====================== ====================== ==================================== ====================== class method form reference ====================== ==================================== ====================== :class:`RepeatChart` ``chart.repeat(row, column)`` :ref:`repeat-chart` :class:`FacetChart` ``chart.facet(facet, row, column)`` :ref:`facet-chart` ====================== ==================================== ====================== .. _layer-chart: Layered Charts ~~~~~~~~~~~~~~ Layered charts allow you to overlay two different charts on the same set of axes. They can be useful, for example, when you wish to draw multiple marks for the same data; for example: .. altair-plot:: import altair as alt from altair.datasets import data stocks = data.stocks.url base = alt.Chart(stocks).encode( x='date:T', y='price:Q', color='symbol:N' ).transform_filter( alt.datum.symbol == 'GOOG' ) base.mark_line() + base.mark_point() Here we have used the ``+`` operator to create a layered chart; alternatively we could use the ``alt.layer`` function, which accepts as its arguments any number of charts: .. altair-plot:: alt.layer( base.mark_line(), base.mark_point(), base.mark_rule() ).interactive() Normally, the output of both of these patterns is a :class:`LayerChart` object, which has properties and methods similar to the :class:`Chart` object. If all charts share identical ``row``, ``column``, or ``facet`` encoding channels, those encodings are hoisted automatically and a :class:`FacetChart` is returned instead (see :ref:`layer-shared-facet`). Order of Layers ^^^^^^^^^^^^^^^ In a layered chart, the order of layers is determined from the order in which they are specified. For example, when creating a chart using ``layer1 + layer2`` or ``alt.layer(layer1, layer2)``, ``layer1`` will appear below ``layer2``, and ``layer2`` may obscure the marks of ``layer1``. For example, consider the following chart where we plot points on top of a heat-map: .. altair-plot:: import altair as alt from altair.datasets import data source = data.movies.url heatmap = alt.Chart(source).mark_rect().encode( alt.X('IMDB Rating:Q').bin(), alt.Y('Rotten Tomatoes Rating:Q').bin(), alt.Color('count()').scale(scheme='greenblue') ) points = alt.Chart(source).mark_circle( color='black', size=5, ).encode( x='IMDB Rating:Q', y='Rotten Tomatoes Rating:Q', ) heatmap + points If we put the two layers in the opposite order, the points will be drawn first and will be obscured by the heatmap marks: .. altair-plot:: points + heatmap If you do not see the expected output when creating a layered chart, make certain that you are ordering the layers appropriately. .. _hconcat-chart: Horizontal Concatenation ~~~~~~~~~~~~~~~~~~~~~~~~ Displaying two plots side-by-side is most generally accomplished with the :class:`HConcatChart` object, which can be created using the :class:`hconcat` function or the ``|`` operator. For example, here is a scatter-plot concatenated with a histogram showing the distribution of its points: .. altair-plot:: import altair as alt from altair.datasets import data penguins = data.penguins.url chart1 = alt.Chart(penguins).mark_point().encode( x=alt.X('Flipper Length (mm):Q', scale=alt.Scale(zero=False)), y=alt.Y('Body Mass (g):Q', scale=alt.Scale(zero=False)), color='Species:N' ).properties( height=300, width=300 ) chart2 = alt.Chart(penguins).mark_bar().encode( x='count()', y=alt.Y('Body Mass (g):Q', bin=alt.Bin(maxbins=30)), color='Species:N' ).properties( height=300, width=100 ) chart1 | chart2 This example uses the ``|`` operator, but could similarly have been created with the :func:`hconcat` function: .. altair-plot:: alt.hconcat(chart1, chart2) The output of both of these is an :class:`HConcatChart` object, which has many of the same top-level methods and attributes as the :class:`Chart` object. Finally, keep in mind that for certain types of horizontally-concatenated charts, where each panel modifies just one aspect of the visualization, repeated and faceted charts are more convenient (see :ref:`repeat-chart` and :ref:`facet-chart` for more explanation). .. _vconcat-chart: Vertical Concatenation ~~~~~~~~~~~~~~~~~~~~~~ Similarly to :ref:`hconcat-chart` above, Altair offers vertical concatenation via the :func:`vconcat` function or the ``&`` operator. For example, here we vertically-concatenate two views of the same data, with a ``brush`` selection to add interaction: .. altair-plot:: import altair as alt from altair.datasets import data source = data.sp500.url brush = alt.selection_interval(encodings=['x']) base = alt.Chart(source).mark_area().encode( x = 'date:T', y = 'price:Q' ).properties( width=600, height=200 ) upper = base.encode(alt.X('date:T').scale(domain=brush)) lower = base.properties( height=60 ).add_params(brush) alt.vconcat(upper, lower) Note that we could just as well have used ``upper & lower`` rather than the more verbose ``alt.vconcat(upper, lower)``. As with horizontally-concatenated charts, keep in mind that for concatenations where only one data grouping or encoding is changing in each panel, using :ref:`repeat-chart` or :ref:`facet-chart` can be more efficient. .. _repeat-chart: Repeated Charts ~~~~~~~~~~~~~~~ The :class:`RepeatChart` object provides a convenient interface for a particular type of horizontal or vertical concatenation, in which the only difference between the concatenated panels is modification of *one or more encodings*. For example, suppose you would like to create a multi-panel scatter-plot to show different projections of a multi-dimensional dataset. Let's first create such a chart manually using ``hconcat`` and ``vconcat``, before showing how ``repeat`` can be used to build the chart more efficiently: .. altair-plot:: import altair as alt from altair.datasets import data penguins = data.penguins.url base = alt.Chart().mark_point().encode( color='Species:N' ).properties( width=200, height=200 ).interactive() chart = alt.vconcat(data=penguins) for y_encoding in ['Flipper Length (mm):Q', 'Body Mass (g):Q']: row = alt.hconcat() for x_encoding in ['Beak Length (mm):Q', 'Beak Depth (mm):Q']: row |= base.encode( x=alt.X(x_encoding, scale=alt.Scale(zero=False)), y=alt.Y(y_encoding, scale=alt.Scale(zero=False)), ) chart &= row chart In this example, we explicitly loop over different x and y encodings to create a 2 x 2 grid of charts showing different views of the data. The code is straightforward, if a bit verbose. The :class:`RepeatChart` pattern, accessible via the :meth:`Chart.repeat` method, makes this type of chart a bit easier to produce: .. altair-plot:: import altair as alt from altair.datasets import data penguins = data.penguins.url alt.Chart(penguins).mark_point().encode( alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)), alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False)), color='Species:N' ).properties( width=200, height=200 ).repeat( row=['Flipper Length (mm)', 'Body Mass (g)'], column=['Beak Length (mm)', 'Beak Depth (mm)'] ).interactive() The :meth:`Chart.repeat` method is the key here: it lets you specify a set of encodings for the row and/or column which can be referred to in the chart's encoding specification using ``alt.repeat('row')`` or ``alt.repeat('column')``. Another option to use the ``repeat`` method is for layering. Here below the columns ``US Gross`` and ``Worldwide Gross`` are layered on the ``y``-axis using ``alt.repeat('layer')``: .. altair-plot:: import altair as alt from altair.datasets import data source = data.movies() alt.Chart(source).mark_line().encode( x=alt.X("IMDB Rating").bin(), y=alt.Y(alt.repeat('layer')).aggregate('mean').title("Mean of US and Worldwide Gross"), color=alt.ColorDatum(alt.repeat('layer')) ).repeat(layer=["US Gross", "Worldwide Gross"]) Currently ``repeat`` can only be encodings (not, e.g., data transforms) but there is discussion within the Vega-Lite community about making this pattern more general in the future. .. _facet-chart: Faceted Charts ~~~~~~~~~~~~~~ Like repeated charts, Faceted charts provide multiple views of a dataset. But instead of having different panels for different encodings, we have different panels for different subsets of data. For example, one panel for each of the three species of penguin in the penguins dataset. This is also called a `small multiple `_ chart, trellis chart, lattice chart, grid chart, or panel chart. We could do this manually using a filter transform along with a horizontal concatenation: .. altair-plot:: import altair as alt from altair.datasets import data penguins = data.penguins.url base = alt.Chart(penguins).mark_point().encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), color='Species:N' ).properties( width=160, height=160 ) chart = alt.hconcat() for species in ['Adelie', 'Chinstrap', 'Gentoo']: chart |= base.transform_filter(alt.datum.Species == species) chart As with the manual approach to :ref:`repeat-chart`, this is straightforward, if a bit verbose. Using ``.facet`` it becomes a bit cleaner: .. altair-plot:: alt.Chart(penguins).mark_point().encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), color='Species:N' ).properties( width=180, height=180 ).facet( column='Species:N' ) For simple charts like this, there is also a ``column`` encoding channel that can give the same results: .. altair-plot:: alt.Chart(penguins).mark_point().encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), color='Species:N', column='Species:N' ).properties( width=180, height=180 ) The advantage of using ``.facet`` is that it can create faceted views of more complicated compound charts. For example, here is a faceted view of a layered chart with a hover selection: .. altair-plot:: hover = alt.selection_point(on='pointerover', nearest=True, empty=False) when_hover = alt.when(hover) base = alt.Chart(penguins).encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), color=alt.condition(hover, 'Species:N', alt.value('lightgray')) ).properties( width=180, height=180, ) points = base.mark_point().add_params(hover) text = base.mark_text(dy=-5).encode( text = 'Species:N', opacity = alt.condition(hover, alt.value(1), alt.value(0)) ) alt.layer(points, text).facet( 'Species:N', ) Though each of the above examples have faceted the data across columns, faceting across rows (or across rows *and* columns) is supported as well. .. _layer-shared-facet: Layering charts that share a facet encoding ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When every chart passed to :func:`layer` (or combined with ``+``) carries identical ``row``, ``column``, or ``facet`` encoding channels, those encodings are hoisted automatically and a :class:`FacetChart` is returned. The two forms below produce the same Vega-Lite specification: .. altair-plot:: import altair as alt from altair.datasets import data penguins = data.penguins.url base = alt.Chart(penguins).encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), row='Species:N', ) # facet encoding shared on each layer — hoisted automatically alt.layer( base.mark_point(), base.mark_line(), ) The equivalent explicit form is: .. code-block:: python base = alt.Chart(penguins).encode( x=alt.X('Flipper Length (mm):Q').scale(zero=False), y=alt.Y('Body Mass (g):Q').scale(zero=False), ) alt.layer( base.mark_point(), base.mark_line(), ).facet(row='Species:N') If the facet encodings differ across layers, or only some layers carry them, a ``TypeError`` is raised as before. ================================================ FILE: doc/user_guide/configuration.rst ================================================ .. currentmodule:: altair .. _user-guide-configuration: Top-Level Chart Configuration ============================= Many aspects of a chart's appearance can be configured at the top level using the ``configure_*()`` methods. These methods and the properties that they set are only valid at the top level of a chart, and can be thought of as a way of setting a chart theme: that is, they set the default styles for the entire chart, and these defaults can be overridden by specific style settings associated with chart elements. These methods and their arguments will be outlined below: - :ref:`config-chart` :meth:`Chart.configure` - :ref:`config-axis` :meth:`Chart.configure_axis` - :ref:`config-header` :meth:`Chart.configure_header` - :ref:`config-legend` :meth:`Chart.configure_legend` - :ref:`config-mark` :meth:`Chart.configure_mark` - :ref:`config-scale` :meth:`Chart.configure_scale` - :ref:`config-range` :meth:`Chart.configure_range` - :ref:`config-projection` :meth:`Chart.configure_projection` - :ref:`config-composition` :meth:`Chart.configure_concat`, :meth:`Chart.configure_facet` - :ref:`config-selection` :meth:`Chart.configure_selection` - :ref:`config-title` :meth:`Chart.configure_title` - :ref:`config-view` :meth:`Chart.configure_view` For more discussion of approaches to chart customization, see :ref:`user-guide-customization`. .. _config-chart: Chart Configuration ------------------- The :meth:`Chart.configure` method adds a :class:`Config` instance to the chart, and accepts the following parameters: .. altair-object-table:: altair.Config .. _config-axis: Axis Configuration ------------------ Axis configuration defines default settings for axes and can be set using the :meth:`Chart.configure_axis` method. Properties defined here are applied to all axes in the figure. Additional property blocks can target more specific axis types based on the orientation ("axisX", "axisY", "axisLeft", "axisTop", etc.) or band scale type ("axisBand"). For example, properties defined under the "axisBand" property will only apply to axes visualizing "band" scales. If multiple axis config blocks apply to a single axis, type-based options take precedence over orientation-based options, which in turn take precedence over general options. The methods are the following: - :meth:`Chart.configure_axis` - :meth:`Chart.configure_axisBand` - :meth:`Chart.configure_axisBottom` - :meth:`Chart.configure_axisLeft` - :meth:`Chart.configure_axisRight` - :meth:`Chart.configure_axisTop` - :meth:`Chart.configure_axisX` - :meth:`Chart.configure_axisY` - :meth:`Chart.configure_axisDiscrete` - :meth:`Chart.configure_axisPoint` - :meth:`Chart.configure_axisQuantitative` - :meth:`Chart.configure_axisTemporal` - :meth:`Chart.configure_axisXBand` - :meth:`Chart.configure_axisXDiscrete` - :meth:`Chart.configure_axisXPoint` - :meth:`Chart.configure_axisXQuantitative` - :meth:`Chart.configure_axisXTemporal` - :meth:`Chart.configure_axisYBand` - :meth:`Chart.configure_axisYDiscrete` - :meth:`Chart.configure_axisYPoint` - :meth:`Chart.configure_axisYQuantitative` - :meth:`Chart.configure_axisYTemporal` They have the following properties: .. altair-object-table:: altair.AxisConfig .. _config-header: Header Configuration -------------------- Header configuration defines default settings for headers including the font, color, size, and position of the title and labels and can be set using the :meth:`Chart.configure_header` method. Here is an example: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', column='Origin:N' ).properties( width=180, height=180 ) chart.configure_header( titleColor='green', titleFontSize=14, labelColor='red', labelFontSize=14 ) Additional property blocks can target more specific header types. The methods are the following: - :meth:`Chart.configure_header` - :meth:`Chart.configure_headerColumn` - :meth:`Chart.configure_headerFacet` - :meth:`Chart.configure_headerRow` They have the following properties: .. altair-object-table:: altair.HeaderConfig .. _config-legend: Legend Configuration -------------------- The :meth:`Chart.configure_legend` allows you to customize the appearance of chart legends, including location, fonts, bounding boxes, colors, and more. Here is an example: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) chart.configure_legend( strokeColor='gray', fillColor='#EEEEEE', padding=10, cornerRadius=10, orient='top-right' ) Additional properties are summarized in the following table: .. altair-object-table:: altair.LegendConfig .. _config-mark: Mark and Mark Style Configuration --------------------------------- The mark configuration can be set using the :meth:`Chart.configure_mark` method, which sets the default properties for all marks in the chart. In addition, the config object also provides mark-specific configuration using the mark type (e.g. :meth:`Chart.configure_area`) for defining default properties for each mark. For general configuration of all mark types, use: - :meth:`Chart.configure_mark` For configurations specific to particular mark types, use: - :meth:`Chart.configure_arc` - :meth:`Chart.configure_area` - :meth:`Chart.configure_bar` - :meth:`Chart.configure_boxplot` - :meth:`Chart.configure_circle` - :meth:`Chart.configure_errorband` - :meth:`Chart.configure_errorbar` - :meth:`Chart.configure_geoshape` - :meth:`Chart.configure_image` - :meth:`Chart.configure_line` - :meth:`Chart.configure_point` - :meth:`Chart.configure_rect` - :meth:`Chart.configure_rule` - :meth:`Chart.configure_square` - :meth:`Chart.configure_text` - :meth:`Chart.configure_tick` - :meth:`Chart.configure_trail` Each of the above methods accepts the following properties: .. altair-object-table:: altair.MarkConfig In addition to the default mark properties above, default values can be further customized using named styles defined as keyword arguments to the :meth:`Chart.configure_style` method. Styles can then be invoked by including a style property within a mark definition object. .. _config-scale: Scale Configuration ------------------- Scales can be configured using :meth:`Chart.configure_scale`, which has the following properties: .. altair-object-table:: altair.ScaleConfig .. _config-range: Scale Range Configuration ------------------------- Scale ranges can be configured using :meth:`Chart.configure_range`, which has the following properties: .. altair-object-table:: altair.RangeConfig .. _config-projection: Projection Configuration ------------------------ Projections can be configured using :meth:`Chart.configure_projection`, which has the following properties: .. altair-object-table:: altair.ProjectionConfig .. _config-composition: Concat and Facet Configuration ------------------------------ Various aspects of concat and facet charts can be configured using :meth:`Chart.configure_concat` and :meth:`Chart.configure_facet`, which have the following properties: .. altair-object-table:: altair.CompositionConfig .. _config-selection: Selection Configuration ----------------------- Selections can be configured using :meth:`Chart.configure_selection`, which has the following properties: .. altair-object-table:: altair.SelectionConfig .. _config-title: Title Configuration ------------------- The :meth:`Chart.configure_title` method allows configuration of the chart title, including the font, color, placement, and orientation. Here is an example: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', ).properties( title='Cars Data' ) chart.configure_title( fontSize=20, font='Courier', anchor='start', color='gray' ) Additional title configuration options are listed in the following table: .. altair-object-table:: altair.TitleConfig .. _config-view: View Configuration ------------------ The :meth:`Chart.configure_view` method allows you to configure aspects of the chart's *view*, i.e. the area of the screen in which the data and scales are drawn. Here is an example to demonstrate some of the visual features that can be controlled: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', ) chart.configure_view( continuousHeight=200, continuousWidth=200, strokeWidth=4, fill='#FFEEDD', stroke='red', ) Additional properties are summarized in the following table: .. altair-object-table:: altair.ViewConfig ================================================ FILE: doc/user_guide/custom_renderers.rst ================================================ .. _customizing-renderers: Customizing Renderers ===================== A renderer, as introduced in :ref:`renderers`, is a function that accepts a Vega-Lite or Vega visualization specification as a Python ``dict``, and returns a Python ``dict`` in Jupyter's `MIME Bundle format `_. This dictionary will be returned by a charts ``_repr_mimebundle_`` method. The keys of the MIME bundle should be MIME types (such as ``image/png``) and the values should be the data for that MIME type (text, base64 encoded binary or JSON). Altair's default ``html`` renderer returns a cross-platform HTML representation using the ``"text/html"`` mimetype; schematically it looks like this:: def default_renderer(spec): bundle = {'text/html': generate_html(spec)} metadata = {} return bundle, metadata If a renderer needs to do custom display logic that doesn't use the frontend's display system, it can also return an empty MIME bundle dict:: def empty_bundle_renderer(spec): # Custom display logic that uses the spec ... # Return empty MIME bundle return {} As a simple example of a custom renderer, imagine we would like to add a ``plaintext`` renderer that renders a chart description in plain text. We could do it this way:: def plaintext_mimetype(spec): return {'text/plain': "description: " + spec.get('description', 'none')} alt.renderers.register('plaintext', plaintext_mimetype) The ``alt.renderers`` registry allows the user to define and enable new renderers. Now you can enable this mimetype and then when your chart is displayed you will see this description:: alt.renderers.enable('plaintext') alt.Chart('data.txt').mark_point().encode( x='x:Q', y='y:Q' ).properties( description='This is a simple chart' ) .. code-block:: none description: This is a simple chart This is a simple example, but it shows you the flexibility of this approach. If you have a frontend that recognizes ``_repr_mimebundle_`` as a means of obtaining a MIME type representation of a Python object, then you can define a function that will process the chart content in any way before returning any mimetype. ================================================ FILE: doc/user_guide/customization.rst ================================================ .. currentmodule:: altair .. _user-guide-customization: Customizing Visualizations ========================== Altair's goal is to automatically choose useful plot settings and configurations so that the user is free to think about the data rather than the mechanics of plotting. That said, once you have a useful visualization, you will often want to adjust certain aspects of it. This section of the documentation outlines some of the ways to make these adjustments. Global Config vs. Local Config vs. Encoding ------------------------------------------- There are often two or three different ways to specify the look of your plots depending on the situation. For example, suppose we are creating a scatter plot of the ``cars`` dataset: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Acceleration:Q', y='Horsepower:Q' ) Suppose you wish to change the color of the points to red, and the opacity of the points to 20%. There are three possible approaches to these: 1. “Global Config” acts on an entire chart object 2. “Local Config” acts on one mark of the chart 3. “Encoding” channels can also be used to set some chart properties Global Config ~~~~~~~~~~~~~ First, every chart type has a ``"config"`` property at the top level that acts as a sort of theme for the whole chart and all of its sub-charts. Here you can specify things like axes properties, mark properties, selection properties, and more. Altair allows you to access these through the ``configure_*`` methods of the chart. Here we will use the :meth:`~Chart.configure_mark` property: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Acceleration:Q', y='Horsepower:Q' ).configure_mark( opacity=0.2, color='red' ) There are a couple things to be aware of when using this kind of global configuration: 1. By design configurations will affect *every mark* used within the chart 2. The global configuration is only permissible at the top-level; so, for example, if you tried to layer the above chart with another, it would result in an error. For a full discussion of global configuration options, see :ref:`user-guide-configuration`. Local Config ~~~~~~~~~~~~ If you would like to configure the look of the mark locally, such that the setting only affects the particular chart property you reference, this can be done via a local configuration setting. In the case of mark properties, the best approach is to set the property as an argument to the ``mark_*`` method. Here we will use :meth:`~Chart.mark_point`: .. altair-plot:: alt.Chart(cars).mark_point(opacity=0.2, color='red').encode( x='Acceleration:Q', y='Horsepower:Q' ) Unlike when using the global configuration, here it is possible to use the resulting chart as a layer or facet in a compound chart. Local config settings like this one will always override global settings. Encoding ~~~~~~~~ Finally, it is possible to set chart properties via the encoding channel (see :ref:`user-guide-encoding`). Rather than mapping a property to a data column, you can map a property directly to a value using the :func:`value` function: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Acceleration:Q', y='Horsepower:Q', opacity=alt.value(0.2), color=alt.value('red') ) Note that only a limited set of mark properties can be bound to encodings, so for some (e.g. ``fillOpacity``, ``strokeOpacity``, etc.) the encoding approach is not available. Encoding settings will always override local or global configuration settings. Which to Use? ~~~~~~~~~~~~~ The precedence order for the three approaches is (from lowest to highest) *global config*, *local config*, *encoding*. That is, if a chart property is set both globally and locally, the local setting will win-out. If a property is set both via a configuration and an encoding, the encoding will win-out. In most usage, we recommend always using the highest-precedence means of setting properties; i.e. an encoding, or a local configuration for properties that are not tied to an encoding. Global configurations should be reserved for creating themes that are applied just before the chart is rendered. Adjusting the Title ------------------- By default an Altair chart does not have a title, as seen in this example. .. altair-plot:: import altair as alt from altair.datasets import data iowa = data.iowa_electricity.url alt.Chart(iowa).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) You can add a simple title by passing the ``title`` keyword argument with the data. .. altair-plot:: alt.Chart(iowa, title="Iowa's green energy boom").mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) It is also possible to add a subtitle by passing in an ``alt.Title`` object. .. altair-plot:: alt.Chart( iowa, title=alt.Title( "Iowa's green energy boom", subtitle="A growing share of the state's energy has come from renewable sources" ) ).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) The subtitle can run to two lines by passing a list where each list item is a line (if you don't want to create this list manually as in the example below, you can use the ``wrap`` function from the `textwrap library `_ to split a string into a list of substrings of a certain length). .. altair-plot:: alt.Chart( iowa, title=alt.Title( "Iowa's green energy boom", subtitle=["A growing share of the state's energy", "has come from renewable sources"] ) ).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) The ``Title`` object can also configure a number of other attributes, e.g., to ``anchor`` it to the ``'start'`` (left) of the chart, and to ``orient`` it at the ``'bottom'`` of the chart (see :ref:`user-guide-configuration` for more options). .. altair-plot:: alt.Chart( iowa, title=alt.Title( "Iowa's green energy boom", subtitle="A growing share of the state's energy has come from renewable sources", anchor='start', orient='bottom', offset=20 ) ).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) In the chart above, you can see that the title is positioned all the way to the left, so that it lines up with the label on the y-axis. You can align the title to the axis line instead by setting the reference ``frame`` for the anchor position to be relative to the ``'group'`` (i.e. the data portion of the chart, excluding labels and titles). .. altair-plot:: alt.Chart( iowa, title=alt.Title( "Iowa's green energy boom", subtitle=["A growing share of the state's energy has come from", "renewable sources"], anchor='start', frame='group', orient='bottom', offset=20 ) ).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) Adjusting Axis Limits --------------------- The default axis limit used by Altair is dependent on the type of the data. To fine-tune the axis limits beyond these defaults, you can use the :meth:`scale` method of the axis encodings. For example, consider the following plot: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Acceleration:Q', y='Horsepower:Q' ) Altair inherits from Vega-Lite the convention of always including the zero-point in quantitative axes; if you would like to turn this off, you can add the :meth:`scale` method to the :class:`X` encoding that specifies ``zero=False``: .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Acceleration:Q').scale(zero=False), y='Horsepower:Q' ) To specify exact axis limits, you can use the ``domain`` property of the scale: .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Acceleration:Q').scale(domain=(5, 20)), y='Horsepower:Q' ) The problem is that the data still exists beyond the scale, and we need to tell Altair what to do with this data. One option is to "clip" the data by setting the ``"clip"`` property of the mark to True: .. altair-plot:: alt.Chart(cars).mark_point(clip=True).encode( alt.X('Acceleration:Q').scale(domain=(5, 20)), y='Horsepower:Q' ) Another option is to "clamp" the data; that is, to move points beyond the limit to the edge of the domain: .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Acceleration:Q').scale(domain=(5, 20), clamp=True), y='Horsepower:Q' ).interactive() For interactive charts like the one above, the clamping happens dynamically, which can be useful for keeping in mind outliers as you pan and zoom on the chart. Adjusting Axis Labels --------------------- Altair also gives you tools to easily configure the appearance of axis labels. For example consider this plot: .. altair-plot:: import pandas as pd df = pd.DataFrame( {'x': [0.03, 0.04, 0.05, 0.12, 0.07, 0.15], 'y': [10, 35, 39, 50, 24, 35] }) alt.Chart(df).mark_circle().encode( x='x', y='y' ) To fine-tune the formatting of the tick labels and to add a custom title to each axis, we can pass to the :class:`X` and :class:`Y` encoding a custom axis definition within the :meth:`axis` method. Here is an example of formatting the x labels as a percentage, and the y labels as a dollar value: .. altair-plot:: alt.Chart(df).mark_circle().encode( alt.X('x').axis(format='%').title('percentage'), alt.Y('y').axis(format='$').title('dollar amount') ) Axis labels can be easily removed: .. altair-plot:: alt.Chart(df).mark_circle().encode( alt.X('x').axis(labels=False), alt.Y('y').axis(labels=False) ) Axis title can also be rotated: .. altair-plot:: alt.Chart(df).mark_circle().encode( alt.X('x').axis(title="x"), alt.Y('y').axis( title="Y Axis Title", titleAngle=0, titleAlign="left", titleY=-2, titleX=0, ) ) Additional formatting codes are available; for a listing of these see the `d3 Format Code Documentation `_. Adjusting the Legend -------------------- A legend is added to the chart automatically when the ``color``, ``shape`` or ``size`` arguments are passed to the :func:`encode` function. In this example we'll use ``color``. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) In this case, the legend can be customized by introducing the :class:`Color` class and taking advantage of its :meth:`legend` method. The ``shape`` and ``size`` arguments have their own corresponding classes. The legend option on all of them expects a :class:`Legend` object as its input, which accepts arguments to customize many aspects of its appearance. One example is to move the legend to another position with the ``orient`` argument. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.Color('Origin:N').legend(orient="left") ) Another thing you can do is set a ``title``; in this case we can use the :meth:`title` method directly as a shortcut or specify the ``title`` parameter inside the :meth:`legend` method:. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.Color('Origin:N').title("Origin") ) You can remove the legend entirely by submitting a null value. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.Color('Origin:N').legend(None), ) Removing the Chart Border ------------------------- Basic Altair charts are drawn with both a grid and an outside border. To create a chart with no border, you will need to remove them both. As an example, let's start with a simple scatter plot. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) First remove the grid using the :meth:`configure_axis` method. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).configure_axis( grid=False ) You'll note that while the inside rules are gone, the outside border remains. Hide it by setting ``stroke=None`` inside :meth:`configure_view` (``strokeWidth=0`` and ``strokeOpacity=0`` also works): .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).configure_axis( grid=False ).configure_view( stroke=None ) It is also possible to completely remove all borders and axes by combining the above option with setting ``axis`` to ``None`` during encoding. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( alt.X('Horsepower:Q').axis(None), alt.Y('Miles_per_Gallon:Q').axis(None), color='Origin:N' ).configure_axis( grid=False ).configure_view( stroke=None ) Customizing Colors ------------------ As discussed in :ref:`type-legend-scale`, Altair chooses a suitable default color scheme based on the type of the data that the color encodes. These defaults can be customized using the :meth:`scale` method of the :class:`Color` class. Color Schemes ~~~~~~~~~~~~~ Altair includes a set of named color schemes for both categorical and sequential data, defined by the vega project; see the `Vega documentation `_ for a full gallery of available color schemes. These schemes can be passed to the `scheme` argument of the :meth:`scale` method: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color=alt.Color('Acceleration').scale(scheme="lightgreyred") ) The color scheme we used above highlights points on one end of the scale, while keeping the rest muted. If we want to highlight the lower ``Acceleration`` data to red color instead, we can use the ``reverse`` parameter to reverse the color scheme: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color=alt.Color('Acceleration').scale(scheme="lightgreyred", reverse=True) ) Color Domain and Range ~~~~~~~~~~~~~~~~~~~~~~ To create a custom color scales, we can use the ``domain`` and ``range`` parameters of the ``scale`` method for the values and colors, respectively. This works both for continuous scales, where it can help highlight specific ranges of values: .. altair-plot:: domain = [5, 8, 10, 12, 25] range_ = ['#9cc8e2', '#9cc8e2', 'red', '#5ba3cf', '#125ca4'] alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color=alt.Color('Acceleration').scale(domain=domain, range=range_) ) And for discrete scales: .. altair-plot:: domain = ['Europe', "Japan", "USA"] range_ = ['seagreen', 'firebrick', 'rebeccapurple'] alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color=alt.Color('Origin').scale(domain=domain, range=range_) ) Raw Color Values ~~~~~~~~~~~~~~~~ The ``scale`` is what maps the raw input values into an appropriate color encoding for displaying the data. If your data entries consist of raw color names or codes, you can set ``scale(None)`` to use those colors directly: .. altair-plot:: import pandas as pd import altair as alt data = pd.DataFrame({ 'x': range(6), 'color': ['red', 'steelblue', 'chartreuse', '#F4D03F', '#D35400', '#7D3C98'] }) alt.Chart(data).mark_point( filled=True, size=100 ).encode( x='x', color=alt.Color('color').scale(None) ) Adjusting the Width of Bar Marks -------------------------------- The width of the bars in a bar plot are controlled through the ``size`` property in the :meth:`~Chart.mark_bar()`: .. altair-plot:: import altair as alt import pandas as pd data = pd.DataFrame({'name': ['a', 'b'], 'value': [4, 10]}) alt.Chart(data).mark_bar(size=10).encode( x='name:O', y='value:Q' ) But since ``mark_bar(size=10)`` only controls the width of the bars, it might become possible that the width of the chart is not adjusted accordingly: .. altair-plot:: alt.Chart(data).mark_bar(size=30).encode( x='name:O', y='value:Q' ) Therefore, it is often preferred to set the width of the entire chart relative to the number of distinct categories using :class:`Step`, which you can can see an example of a few charts down. .. _customization-chart-size: Adjusting Chart Size -------------------- The size of charts can be adjusted using the ``width`` and ``height`` properties. For example: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_bar().encode( x='Origin', y='count()' ).properties( width=200, height=150 ) Note that in the case of faceted or other compound charts, this width and height applies to the subchart rather than to the overall chart: .. altair-plot:: alt.Chart(cars).mark_bar().encode( x='Origin', y='count()', column='Cylinders:Q' ).properties( width=100, height=100 ).resolve_scale( x='independent' ) To change the chart size relative to the number of distinct categories, you can use the :class:`Step` class to specify the width/height for each category rather than for the entire chart: .. altair-plot:: alt.Chart(cars).mark_bar().encode( x='Origin', y='count()', column='Cylinders:Q' ).properties( width=alt.Step(35), height=100 ).resolve_scale( x='independent' ) If you want your chart size to respond to the width of the HTML page or container in which it is rendered, you can set ``width`` or ``height`` to the string ``"container"``: .. altair-plot:: :div_class_: full-width-plot alt.Chart(cars).mark_bar().encode( x='Origin', y='count()', ).properties( width='container', height=200 ) Note that this will only scale with the container if its parent element has a size determined outside the chart itself; For example, the container may be a ``
`` element that has style ``width: 100%; height: 300px``. .. _chart-themes: Chart Themes ------------ .. note:: This material was changed considerably with the release of Altair ``5.5.0``. Altair makes available a theme registry that lets users apply chart configurations globally within any Python session. The :mod:`altair.theme` module provides :ref:`helper functions ` to interact with the registry. Each theme in the registry is a function which define a specification dictionary that will be added to every created chart. For example, the default theme configures the default size of a single chart: >>> import altair as alt >>> default = alt.theme.get() >>> default() {'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}} You can see that any chart you create will have this theme applied, and these configurations added to its specification: .. altair-plot:: :output: repr import altair as alt from altair.datasets import data chart = alt.Chart(data.cars.url).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q' ) chart.to_dict() The rendered chart will then reflect these configurations: .. altair-plot:: chart Changing the Theme ~~~~~~~~~~~~~~~~~~ If you would like to enable any other theme for the length of your Python session, you can call :func:`altair.theme.enable`. For example, Altair includes a theme in which the chart background is opaque rather than transparent: .. altair-plot:: :output: repr alt.theme.enable('opaque') chart.to_dict() .. altair-plot:: chart Notice that the background color of the chart is now set to white. If you would like no theme applied to your chart, you can use the theme named ``'none'``: .. altair-plot:: :output: repr alt.theme.enable('none') chart.to_dict() .. altair-plot:: chart Because the view configuration is not set, the chart is smaller than the default rendering. If you would like to use any theme just for a single chart, you can use the ``with`` statement to enable a temporary theme: .. altair-plot:: :output: none with alt.theme.enable('default'): spec = chart.to_json() .. note:: The above requires that a conversion/saving operation occurs during the ``with`` block, such as :meth:`~Chart.to_dict`, :meth:`~Chart.to_json`, :meth:`~Chart.save`. See https://github.com/vega/altair/issues/3586 Built-in Themes ~~~~~~~~~~~~~~~ Currently Altair does not offer many built-in themes, but we plan to add more options in the future. You can get a feel for the themes inherited from `Vega Themes`_ via *Vega-Altair Theme Test* below: .. altair-theme:: tests.altair_theme_test.alt_theme_test :fold: :summary: Show Vega-Altair Theme Test Defining a Custom Theme ~~~~~~~~~~~~~~~~~~~~~~~ A theme is simply a function that returns a dictionary of default values to be added to the chart specification at rendering time. Using :func:`altair.theme.register`, we can both register and enable a theme at the site of the function definition. For example, here we define a theme in which all marks are drawn with black fill unless otherwise specified: .. altair-plot:: import altair as alt from altair.datasets import data # define, register and enable theme @alt.theme.register("black_marks", enable=True) def black_marks() -> alt.theme.ThemeConfig: return { "config": { "view": {"continuousWidth": 300, "continuousHeight": 300}, "mark": {"color": "black", "fill": "black"}, } } # draw the chart cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q' ) If you want to restore the default theme, use: .. altair-plot:: :output: none alt.themes.enable('default') When experimenting with your theme, you can use the code below to see how it translates across a range of charts/marks: .. altair-code-ref:: tests.altair_theme_test.alt_theme_test :fold: :summary: Show Vega-Altair Theme Test code For more ideas on themes, see the `Vega Themes`_ repository. Localization ------------ The preferred format of numbers, dates, and currencies varies by language and locale. Vega-Altair takes advantage of `D3's localization support`_ to make it easy to configure the locale for your chart using the global ``alt.renderers.set_embed_options`` function. Here ``format_locale`` and ``time_format_locale`` may either be D3 format dictionaries, or strings with the names of pre-defined locales. For example, here we use the Italian locale (named ``it-IT``) for both currencies and dates: .. altair-plot:: :output: none import altair as alt from altair.datasets import data alt.renderers.set_embed_options(format_locale="it-IT", time_format_locale="it-IT") source = data.stocks.url chart = alt.Chart(source).mark_area().transform_filter('year(datum.date) == 2009').encode( x='date:T', y=alt.Y('price:Q', axis=alt.Axis(format="$.0f")), color='symbol:N' ) chart .. image:: /_static/stocks_it-IT.svg :alt: Area chart of stock prices using Italian locale See https://unpkg.com/d3-format/locale/ for a list of available format locale names, and see https://unpkg.com/d3-time-format/locale/ for a list of available time format locales. The configured localization settings persist upon saving. .. note:: The globally defined properties, ``format_locale`` and ``time_format_locale``, apply to the full session and are not specific to individual charts. To revert localization settings to the default U.S. English locale, use the following command:: alt.renderers.set_embed_options(format_locale="en-US", time_format_locale="en-US") .. _Vega Themes: https://github.com/vega/vega-themes/ .. _`D3's localization support`: https://d3-wiki.readthedocs.io/zh-cn/master/Localization/ ================================================ FILE: doc/user_guide/data.rst ================================================ .. currentmodule:: altair .. _user-guide-data: Specifying Data --------------- The basic data model used by Altair is tabular data, similar to a spreadsheet or database table. Individual datasets are assumed to contain a collection of records (rows), which may contain any number of named data fields (columns). Each top-level chart object (i.e. :class:`Chart`, :class:`LayerChart`, :class:`VConcatChart`, :class:`HConcatChart`, :class:`RepeatChart`, and :class:`FacetChart`) accepts a dataset as its first argument. There are many different ways of specifying a dataset: - as a `pandas DataFrame `_ - as a DataFrame that supports the DataFrame Interchange Protocol (contains a ``__dataframe__`` attribute), e.g. polars and pyarrow. This is experimental. - as a :class:`Data` or related object (i.e. :class:`UrlData`, :class:`InlineData`, :class:`NamedData`) - as a url string pointing to a ``json`` or ``csv`` formatted text file - as a `geopandas GeoDataFrame `_, `Shapely Geometries `_, `GeoJSON Objects `_ or other objects that support the ``__geo_interface__`` - as a generated dataset such as numerical sequences or geographic reference elements When data is specified as a pandas DataFrame, Altair uses the data type information provided by pandas to automatically determine the data types required in the encoding. For example, here we specify data via a pandas DataFrame and Altair automatically detects that the x-column should be visualized on a categorical (nominal) scale and that the y-column should be visualized on a quantitative scale: .. altair-plot:: import altair as alt import pandas as pd data = pd.DataFrame({'x': ['A', 'B', 'C', 'D', 'E'], 'y': [5, 3, 6, 7, 2]}) alt.Chart(data).mark_bar().encode( x='x', y='y', ) By comparison, all other ways of specifying the data (including non-pandas DataFrames) requires encoding types to be declared explicitly. Here we create the same chart as above using a :class:`Data` object, with the data specified as a JSON-style list of records: .. altair-plot:: import altair as alt data = alt.Data(values=[{'x': 'A', 'y': 5}, {'x': 'B', 'y': 3}, {'x': 'C', 'y': 6}, {'x': 'D', 'y': 7}, {'x': 'E', 'y': 2}]) alt.Chart(data).mark_bar().encode( x='x:N', # specify nominal data y='y:Q', # specify quantitative data ) Notice the extra markup required in the encoding; because Altair cannot infer the types within a :class:`Data` object, we must specify them manually (here we use :ref:`shorthand-description` to specify *nominal* (``N``) for ``x`` and *quantitative* (``Q``) for ``y``; see :ref:`encoding-data-types`). Similarly, we must also specify the data type when referencing data by URL: .. altair-plot:: import altair as alt from altair.datasets import data url = data.cars.url alt.Chart(url).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q' ) Encodings and their associated types are further discussed in :ref:`user-guide-encoding`. Below we go into more detail about the different ways of specifying data in an Altair chart. pandas DataFrame ~~~~~~~~~~~~~~~~ .. _data-in-index: Including Index Data ^^^^^^^^^^^^^^^^^^^^ By design Altair only accesses dataframe columns, not dataframe indices. At times, relevant data appears in the index. For example: .. altair-plot:: :output: repr import numpy as np rand = np.random.RandomState(0) data = pd.DataFrame({'value': rand.randn(100).cumsum()}, index=pd.date_range('2018', freq='D', periods=100)) data.head() If you would like the index to be available to the chart, you can explicitly turn it into a column using the ``reset_index()`` method of pandas dataframes: .. altair-plot:: alt.Chart(data.reset_index()).mark_line().encode( x='index:T', y='value:Q' ) If the index object does not have a ``name`` attribute set, the resulting column will be called ``"index"``. More information is available in the `pandas documentation `_. .. _data-long-vs-wide: Long-form vs. Wide-form Data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There are two common conventions for storing data in a dataframe, sometimes called *long-form* and *wide-form*. Both are sensible patterns for storing data in a tabular format; briefly, the difference is this: - **wide-form data** has one row per *independent variable*, with metadata recorded in the *row and column labels*. - **long-form data** has one row per *observation*, with metadata recorded within the table as *values*. Altair's grammar works best with **long-form** data, in which each row corresponds to a single observation along with its metadata. A concrete example will help in making this distinction more clear. Consider a dataset consisting of stock prices of several companies over time. The wide-form version of the data might be arranged as follows: .. altair-plot:: :output: repr :chart-var-name: wide_form wide_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01'], 'AAPL': [189.95, 182.22, 198.08], 'AMZN': [89.15, 90.56, 92.64], 'GOOG': [707.00, 693.00, 691.48]}) print(wide_form) Notice that each row corresponds to a single time-stamp (here time is the independent variable), while metadata for each observation (i.e. company name) is stored within the column labels. The long-form version of the same data might look like this: .. altair-plot:: :output: repr :chart-var-name: long_form long_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01', '2007-10-01', '2007-11-01', '2007-12-01', '2007-10-01', '2007-11-01', '2007-12-01'], 'company': ['AAPL', 'AAPL', 'AAPL', 'AMZN', 'AMZN', 'AMZN', 'GOOG', 'GOOG', 'GOOG'], 'price': [189.95, 182.22, 198.08, 89.15, 90.56, 92.64, 707.00, 693.00, 691.48]}) print(long_form) Notice here that each row contains a single observation (i.e. price), along with the metadata for this observation (the date and company name). Importantly, the column and index labels no longer contain any useful metadata. As mentioned above, Altair works best with this long-form data, because relevant data and metadata are stored within the table itself, rather than within the labels of rows and columns: .. altair-plot:: alt.Chart(long_form).mark_line().encode( x='Date:T', y='price:Q', color='company:N' ) Wide-form data can be similarly visualized using e.g. layering (see :ref:`layer-chart`), but it is far less convenient within Altair's grammar. If you would like to convert data from wide-form to long-form, there are two possible approaches: it can be done as a preprocessing step using pandas, or as a transform step within the chart itself. We will detail to two approaches below. .. _data-converting-long-form: Converting with pandas """""""""""""""""""""" This sort of data manipulation can be done as a preprocessing step using pandas_, and is discussed in detail in the `Reshaping and Pivot Tables`_ section of the pandas documentation. For converting wide-form data to the long-form data used by Altair, the ``melt`` method of dataframes can be used. The first argument to ``melt`` is the column or list of columns to treat as index variables; the remaining columns will be combined into an indicator variable and a value variable whose names can be optionally specified: .. altair-plot:: :output: repr wide_form.melt('Date', var_name='company', value_name='price') For more information on the ``melt`` method, see the `pandas melt documentation`_. In case you would like to undo this operation and convert from long-form back to wide-form, the ``pivot`` method of dataframes is useful. .. altair-plot:: :output: repr long_form.pivot(index='Date', columns='company', values='price').reset_index() For more information on the ``pivot`` method, see the `pandas pivot documentation`_. Converting with Fold Transform """""""""""""""""""""""""""""" If you would like to avoid data preprocessing, you can reshape your data using Altair's Fold Transform (see :ref:`user-guide-fold-transform` for a full discussion). With it, the above chart can be reproduced as follows: .. altair-plot:: alt.Chart(wide_form).transform_fold( ['AAPL', 'AMZN', 'GOOG'], as_=['company', 'price'] ).mark_line().encode( x='Date:T', y='price:Q', color='company:N' ) Notice that unlike the pandas ``melt`` function we must explicitly specify the columns to be folded. The ``as_`` argument is optional, with the default being ``["key", "value"]``. .. _data-generated: Generated Data ~~~~~~~~~~~~~~ At times it is convenient to not use an external data source, but rather generate data for display within the chart specification itself. The benefit is that the chart specification can be made much smaller for generated data than for embedded data. Sequence Generator ^^^^^^^^^^^^^^^^^^ Here is an example of using the :func:`sequence` function to generate a sequence of *x* data, along with a :ref:`user-guide-calculate-transform` to compute *y* data. .. altair-plot:: import altair as alt # Note that the following generator is functionally similar to # data = pd.DataFrame({'x': np.arange(0, 10, 0.1)}) data = alt.sequence(0, 10, 0.1, as_='x') alt.Chart(data).transform_calculate( y='sin(datum.x)' ).mark_line().encode( x='x:Q', y='y:Q', ) Graticule Generator ^^^^^^^^^^^^^^^^^^^ Another type of data that is convenient to generate in the chart itself is the latitude/longitude lines on a geographic visualization, known as a graticule. These can be created using Altair's :func:`graticule` generator function. Here is a simple example: .. altair-plot:: import altair as alt data = alt.graticule(step=[15, 15]) alt.Chart(data).mark_geoshape(stroke='black').project( 'orthographic', rotate=[0, -45, 0] ) Sphere Generator ^^^^^^^^^^^^^^^^ Finally when visualizing the globe a sphere can be used as a background layer within a map to represent the extent of the Earth. This sphere data can be created using Altair's :func:`sphere` generator function. Here is an example: .. altair-plot:: import altair as alt sphere_data = alt.sphere() grat_data = alt.graticule(step=[15, 15]) background = alt.Chart(sphere_data).mark_geoshape(fill='aliceblue') lines = alt.Chart(grat_data).mark_geoshape(stroke='lightgrey') alt.layer(background, lines).project('naturalEarth1') .. _pandas: http://pandas.pydata.org/ .. _pandas pivot documentation: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot.html .. _pandas melt documentation: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.melt.html#pandas.DataFrame.melt .. _Reshaping and Pivot Tables: https://pandas.pydata.org/pandas-docs/stable/reshaping.html .. _spatial-data: Spatial Data ~~~~~~~~~~~~ In this section, we explain different methods for reading spatial data into Altair. To learn more about how to work with this data after you have read it in, please see the :ref:`user-guide-geoshape-marks` mark page. .. _spatial-data-gdf: GeoPandas GeoDataFrame ^^^^^^^^^^^^^^^^^^^^^^ It is convenient to use GeoPandas as the source for your spatial data. GeoPandas can read many types of spatial data and Altair works well with GeoDataFrames. Here we define four polygon geometries into a GeoDataFrame and visualize these using the ``mark_geoshape``. .. altair-plot:: :output: repr from shapely import geometry import geopandas as gpd import altair as alt data_geoms = [ {"color": "#F3C14F", "geometry": geometry.Polygon([[1.45, 3.75], [1.45, 0], [0, 0], [1.45, 3.75]])}, {"color": "#4098D7", "geometry": geometry.Polygon([[1.45, 0], [1.45, 3.75], [2.57, 3.75], [2.57, 0], [2.33, 0], [1.45, 0]])}, {"color": "#66B4E2", "geometry": geometry.Polygon([[2.33, 0], [2.33, 2.5], [3.47, 2.5], [3.47, 0], [3.2, 0], [2.57, 0], [2.33, 0]])}, {"color": "#A9CDE0", "geometry": geometry.Polygon([[3.2, 0], [3.2, 1.25], [4.32, 1.25], [4.32, 0], [3.47, 0], [3.2, 0]])}, ] gdf_geoms = gpd.GeoDataFrame(data_geoms) gdf_geoms Since the spatial data in our example is not geographic, we use ``project`` configuration ``type="identity", reflectY=True`` to draw the geometries without applying a geographic projection. By using ``alt.Color(...).scale(None)`` we disable the automatic color assignment in Altair and instead directly use the provided Hex color codes. .. altair-plot:: alt.Chart(gdf_geoms, title="Vega-Altair").mark_geoshape().encode( alt.Color("color:N").scale(None) ).project(type="identity", reflectY=True) .. _spatial-data-inline-geojson: Inline GeoJSON Object ^^^^^^^^^^^^^^^^^^^^^ If your source data is a GeoJSON file and you do not want to load it into a GeoPandas GeoDataFrame you can provide it as a dictionary to the Altair ``Data`` class. A GeoJSON file normally consists of a ``FeatureCollection`` with a list of ``features`` where the information for each geometry is specified within a ``properties`` dictionary. In the following example a GeoJSON-like data object is specified into a ``Data`` class using the ``property`` value of the ``key`` that contain the nested list (here named ``features``). .. altair-plot:: :output: repr obj_geojson = { "type": "FeatureCollection", "features":[ {"type": "Feature", "properties": {"location": "left"}, "geometry": {"type": "Polygon", "coordinates": [[[1.45, 3.75], [1.45, 0], [0, 0], [1.45, 3.75]]]}}, {"type": "Feature", "properties": {"location": "middle-left"}, "geometry": {"type": "Polygon", "coordinates": [[[1.45, 0], [1.45, 3.75], [2.57, 3.75], [2.57, 0], [2.33, 0], [1.45, 0]]]}}, {"type": "Feature", "properties": {"location": "middle-right"}, "geometry": {"type": "Polygon", "coordinates": [[[2.33, 0], [2.33, 2.5], [3.47, 2.5], [3.47, 0], [3.2, 0], [2.57, 0], [2.33, 0]]]}}, {"type": "Feature", "properties": {"location": "right"}, "geometry": {"type": "Polygon", "coordinates": [[[3.2, 0], [3.2, 1.25], [4.32, 1.25], [4.32, 0], [3.47, 0], [3.2, 0]]]}} ] } data_obj_geojson = alt.Data(values=obj_geojson, format=alt.DataFormat(property="features")) data_obj_geojson The label for each objects location is stored within the ``properties`` dictionary. To access these values you can specify a nested variable name (here ``properties.location``) within the color channel encoding. Here we change the coloring encoding to be based on this location label, and apply a ``magma`` color scheme instead of the default one. The ``:O`` suffix indicates that we want Altair to treat these values as ordinal, and you can read more about it in the :ref:`encoding-data-types` page. for the ordinal structured data. .. altair-plot:: alt.Chart(data_obj_geojson, title="Vega-Altair - ordinal scale").mark_geoshape().encode( alt.Color("properties.location:O").scale(scheme='magma') ).project(type="identity", reflectY=True) .. _spatial-data-remote-geojson: GeoJSON File by URL ^^^^^^^^^^^^^^^^^^^ Altair can load GeoJSON resources directly from a web URL. Here we use an example from geojson.xyz. As is explained in :ref:`spatial-data-inline-geojson`, we specify ``features`` as the value for the ``property`` parameter in the ``alt.DataFormat()`` object and prepend the attribute we want to plot (``continent``) with the name of the nested dictionary where the information of each geometry is stored (``properties``). .. altair-plot:: :output: repr url_geojson = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_admin_0_countries.geojson" data_url_geojson = alt.Data(url=url_geojson, format=alt.DataFormat(property="features")) data_url_geojson .. altair-plot:: alt.Chart(data_url_geojson).mark_geoshape().encode(color='properties.continent:N') .. _spatial-data-inline-topojson: Inline TopoJSON Object ^^^^^^^^^^^^^^^^^^^^^^ TopoJSON is an extension of GeoJSON, where the geometry of the features are referred to from a top-level object named arcs. Each shared arc is only stored once to reduce the size of the data. A TopoJSON file object can contain multiple objects (eg. boundary border and province border). When defining a TopoJSON object for Altair we specify the ``topojson`` data format type and the name of the object we like to visualize using the ``feature`` parameter. Here the name of this object key is ``MY_DATA``, but this differs in each dataset. .. altair-plot:: :output: repr obj_topojson = { "arcs": [ [[1.0, 1.0], [0.0, 1.0], [0.0, 0.0], [1.0, 0.0]], [[1.0, 0.0], [2.0, 0.0], [2.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 0.0]], ], "objects": { "MY_DATA": { "geometries": [ {"arcs": [[-3, 0]], "properties": {"name": "abc"}, "type": "Polygon"}, {"arcs": [[1, 2]], "properties": {"name": "def"}, "type": "Polygon"}, ], "type": "GeometryCollection", } }, "type": "Topology", } data_obj_topojson = alt.Data( values=obj_topojson, format=alt.DataFormat(feature="MY_DATA", type="topojson") ) data_obj_topojson .. altair-plot:: alt.Chart(data_obj_topojson).mark_geoshape( ).encode( color="properties.name:N" ).project( type='identity', reflectY=True ) .. _spatial-data-remote-topojson: TopoJSON File by URL ^^^^^^^^^^^^^^^^^^^^ Altair can load TopoJSON resources directly from a web URL. As explained in :ref:`spatial-data-inline-topojson`, we have to use the ``feature`` parameter to specify the object name (here ``boroughs``) and define the type of data as ``topjoson`` in the ``alt.DataFormat()`` object. .. altair-plot:: :output: repr from altair.datasets import data url_topojson = data.london_boroughs.url data_url_topojson = alt.Data( url=url_topojson, format=alt.DataFormat(feature="boroughs", type="topojson") ) data_url_topojson Note: There also exist a shorthand to extract the objects from a topojson file if this file is accessible by URL: ``alt.topo_feature(url=url_topojson, feature="boroughs")`` We color encode the Boroughs by there names as they are stored as an unique identifier (``id``). We use a ``symbolLimit`` of 33 in two columns to display all entries in the legend and change the color scheme to have more distinct colors. We also add a tooltip which shows the name of the borough as we hover over it with the mouse. .. altair-plot:: alt.Chart(data_url_topojson, title="London-Boroughs").mark_geoshape( tooltip=True ).encode( alt.Color("id:N").scale(scheme='tableau20').legend(columns=2, symbolLimit=33) ) Similar to the ``feature`` option, there also exists the ``mesh`` parameter. This parameter extracts a named TopoJSON object set. Unlike the feature option, the corresponding geo data is returned as a single, unified mesh instance, not as individual GeoJSON features. Extracting a mesh is useful for more efficiently drawing borders or other geographic elements that you do not need to associate with specific regions such as individual countries, states or counties. Here below we draw the same Boroughs of London, but now as mesh only. Note: you have to explicitly define ``filled=False`` to draw multi(lines) without fill color. .. altair-plot:: from altair.datasets import data url_topojson = data.london_boroughs.url data_url_topojson_mesh = alt.Data( url=url_topojson, format=alt.DataFormat(mesh="boroughs", type="topojson") ) alt.Chart(data_url_topojson_mesh, title="Border London-Boroughs").mark_geoshape( filled=False ) .. _spatial-data-nested-geojson: Nested GeoJSON Objects ^^^^^^^^^^^^^^^^^^^^^^ GeoJSON data can also be nested within another dataset. In this case it is possible to use the ``shape`` encoding channel in combination with the ``:G`` suffix to visualize the nested features as GeoJSON objects. In the following example the GeoJSON object are nested within ``geo`` in the list of dictionaries: .. altair-plot:: nested_features = [ {"color": "#F3C14F", "geo": {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[[1.45, 3.75], [1.45, 0], [0, 0], [1.45, 3.75]]]}}}, {"color": "#4098D7", "geo": {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[[1.45, 0], [1.45, 3.75], [2.57, 3.75], [2.57, 0], [2.33, 0], [1.45, 0]]]}}}, {"color": "#66B4E2", "geo": {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[[2.33, 0], [2.33, 2.5], [3.47, 2.5], [3.47, 0], [3.2, 0], [2.57, 0], [2.33, 0]]]}}}, {"color": "#A9CDE0", "geo": {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[[3.2, 0], [3.2, 1.25], [4.32, 1.25], [4.32, 0], [3.47, 0], [3.2, 0]]]}}}, ] data_nested_features = alt.Data(values=nested_features) alt.Chart(data_nested_features, title="Vega-Altair").mark_geoshape().encode( shape="geo:G", color=alt.Color("color:N").scale(None) ).project(type="identity", reflectY=True) .. _data-projections: Projections ^^^^^^^^^^^ For geographic data it is best to use the World Geodetic System 1984 as its geographic coordinate reference system with units in decimal degrees. Try to avoid putting projected data into Altair, but reproject your spatial data to EPSG:4326 first. If your data comes in a different projection (eg. with units in meters) and you don't have the option to reproject the data, try using the project configuration ``(type: 'identity', reflectY': True)``. It draws the geometries without applying a projection. .. _data-winding-order: Winding Order ^^^^^^^^^^^^^ LineString, Polygon and MultiPolygon geometries contain coordinates in an order: lines go in a certain direction, and polygon rings do too. The GeoJSON-like structure of the ``__geo_interface__`` recommends the right-hand rule winding order for Polygon and MultiPolygons. Meaning that the exterior rings should be counterclockwise and interior rings are clockwise. While it recommends the right-hand rule winding order, it does not reject geometries that do not use the right-hand rule. Altair does NOT follow the right-hand rule for geometries, but uses the left-hand rule. Meaning that exterior rings should be clockwise and interior rings should be counterclockwise. If you face a problem regarding winding order, try to force the left-hand rule on your data before usage in Altair using GeoPandas for example as such: .. code:: python from shapely.ops import orient gdf.geometry = gdf.geometry.apply(orient, args=(-1,)) .. toctree:: :maxdepth: 1 :caption: User Guide :hidden: self encodings/index marks/index transform/index interactions/index compound_charts scale_resolve times_and_dates customization configuration saving_charts .. toctree:: :maxdepth: 1 :caption: Advanced Usage :hidden: internals display_frontends custom_renderers data_transformers large_datasets ================================================ FILE: doc/user_guide/data_transformers.rst ================================================ .. _data-transformers: Data Transformers ================= Before a Vega-Lite or Vega specification can be passed to a renderer, it typically has to be transformed in a number of ways: * pandas Dataframe has to be sanitized and serialized to JSON. * The rows of a Dataframe might need to be sampled or limited to a maximum number. * The Dataframe might be written to a ``.csv`` of ``.json`` file for performance reasons. These data transformations are managed by the data transformation API of Altair. .. note:: The data transformation API of Altair should not be confused with the ``transform`` API of Vega and Vega-Lite. A data transformer is a Python function that takes a Vega-Lite data ``dict`` or pandas ``DataFrame`` and returns a transformed version of either of these types:: from typing import Union Data = Union[dict, pd.DataFrame] def data_transformer(data: Data) -> Data: # Transform and return the data return transformed_data Dataset Consolidation ~~~~~~~~~~~~~~~~~~~~~ Datasets passed as pandas dataframes can be represented in the chart in two ways: - As literal dataset values in the ``data`` attribute at any level of the specification - As a named dataset in the ``datasets`` attribute of the top-level specification. The former is a bit more simple, but common patterns of usage in Altair can often lead to full datasets being listed multiple times in their entirety within a single specification. For this reason, Altair 2.2 and newer will by default move all directly-specified datasets into the top-level ``datasets`` entry, and reference them by a unique name determined from the hash of the data representation. The benefit of using a hash-based name is that even if the user specifies a dataset in multiple places when building the chart, the specification will only include one copy. This behavior can be modified by setting the ``consolidate_datasets`` attribute of the data transformer. For example, consider this simple layered chart: .. altair-plot:: :chart-var-name: chart import altair as alt import pandas as pd df = pd.DataFrame({'x': range(5), 'y': [1, 3, 4, 3, 5]}) line = alt.Chart(df).mark_line().encode(x='x', y='y') points = alt.Chart(df).mark_point().encode(x='x', y='y') chart = line + points If we look at the resulting specification, we see that although the dataset was specified twice, only one copy of it is output in the spec: .. altair-plot:: :output: stdout from pprint import pprint pprint(chart.to_dict()) This consolidation of datasets is an extra bit of processing that is turned on by default in all renderers. If you would like to disable this dataset consolidation for any reason, you can do so by setting ``alt.data_transformers.consolidate_datasets = False``, or by using the ``enable()`` context manager to do it only temporarily: .. altair-plot:: :output: stdout with alt.data_transformers.enable(consolidate_datasets=False): pprint(chart.to_dict()) Notice that now the dataset is not specified within the top-level ``datasets`` attribute, but rather as values within the ``data`` attribute of each individual layer. This duplication of data is the reason that dataset consolidation is set to ``True`` by default. Built-in Data Transformers ~~~~~~~~~~~~~~~~~~~~~~~~~~ Altair includes a default set of data transformers with the following signatures. Raise a ``MaxRowsError`` if a Dataframe has more than ``max_rows`` rows:: limit_rows(data, max_rows=5000) Randomly sample a DataFrame (without replacement) before visualizing:: sample(data, n=None, frac=None) Convert a Dataframe to a separate ``.json`` file before visualization:: to_json(data, prefix='altair-data'): Convert a Dataframe to a separate ``.csv`` file before visualization:: to_csv(data, prefix='altair-data'): Convert a Dataframe to inline JSON values before visualization:: to_values(data): Piping ~~~~~~ Multiple data transformers can be piped together using ``pipe``:: from altair import limit_rows, to_values from toolz.curried import pipe pipe(data, limit_rows(10000), to_values) Managing Data Transformers ~~~~~~~~~~~~~~~~~~~~~~~~~~ Altair maintains a registry of data transformers, which includes a default data transformer that is automatically applied to all Dataframes before rendering. To see the registered transformers:: >>> import altair as alt >>> alt.data_transformers.names() ['default', 'json', 'csv'] The default data transformer is the following:: def default_data_transformer(data): return pipe(data, limit_rows, to_values) The ``json`` and ``csv`` data transformers will save a Dataframe to a temporary ``.json`` or ``.csv`` file before rendering. There are a number of performance advantages to these two data transformers: * The full dataset will not be saved in the notebook document. * The performance of the Vega-Lite/Vega JavaScript appears to be better for standalone JSON/CSV files than for inline values. There are disadvantages of the JSON/CSV data transformers: * The Dataframe will be exported to a temporary ``.json`` or ``.csv`` file that sits next to the notebook. * That notebook will not be able to re-render the visualization without that temporary file (or re-running the cell). In our experience, the performance improvement is significant enough that we recommend using the ``json`` data transformer for any large datasets:: alt.data_transformers.enable('json') We hope that others will write additional data transformers - imagine a transformer which saves the dataset to a JSON file on S3, which could be registered and enabled as:: alt.data_transformers.register('s3', lambda data: pipe(data, to_s3('mybucket'))) alt.data_transformers.enable('s3') Storing JSON Data in a Separate Directory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When creating many charts with ``alt.data_transformers.enable('json')`` the working directory can get a bit cluttered. To avoid this we can build a simple custom data transformer that stores all JSON files in separate directory.:: import os import altair as alt from toolz.curried import pipe def json_dir(data, data_dir='altairdata'): os.makedirs(data_dir, exist_ok=True) return pipe(data, alt.to_json(filename=data_dir + '/{prefix}-{hash}.{extension}') ) alt.data_transformers.register('json_dir', json_dir) alt.data_transformers.enable('json_dir', data_dir='mydata') After enabling this data transformer, the JSON files will be stored in what ``data_dir`` was set to when enabling the transformer or 'altairdata' by default. All we had to do was to prefix the ``filename`` argument of the ``alt.to_json`` function with our desired directory and make sure that the directory actually exists. ================================================ FILE: doc/user_guide/display_frontends.rst ================================================ .. _displaying-charts: Displaying Altair Charts ======================== Altair produces `Vega-Lite`_ visualizations, which require a Javascript frontend to display the charts. Because notebook environments combine a Python backend with a Javascript frontend, many users find them convenient for using Altair. Altair charts work out-of-the-box on `Jupyter Notebook`_, `JupyterLab`_, `Zeppelin`_, and related notebook environments, so long as there is a web connection to load the required javascript libraries. Altair can also be used with various IDEs that are enabled to display Altair charts, and can be used offline in most platforms with an appropriate frontend extension enabled; details are below. .. _renderers: Altair's Renderer Framework --------------------------- Because different display systems have different requirements and constraints, Altair provides an API to switch between various *renderers* to tune Altair's chart representation. These can be chosen with the renderer registry in ``alt.renderers``. The most used built-in renderers are: ``alt.renderers.enable("html")`` *(the default)* Output an HTML representation of the chart. The HTML renderer works in JupyterLab_, `Jupyter Notebook`_, `Zeppelin`_, `VSCode-Python`_ and many related notebook frontends, as well as Jupyter ecosystem tools like nbviewer_ and nbconvert_ HTML output. It requires a web connection in order to load relevant Javascript libraries. ``alt.renderers.enable("mimetype")`` *(default prior to Altair 4.0):* Output a vega-lite specific mimetype that can be interpreted by appropriate frontend extensions to display charts. This also outputs a PNG representation of the plot, which is useful to view plots offline or on platforms that don't support rendering vegaspecs, such as GitHub. It works with newer versions of JupyterLab_, nteract_, and `VSCode-Python`_, but does not work with the `Jupyter Notebook`_, or with tools like nbviewer_ and nbconvert_. ``alt.renderers.enable("jupyter")`` *(added in version 5.3):* Output the chart using :ref:`user-guide-jupyterchart`. This renderer is compatible with environments that support third-party Jupyter Widgets including JupyterLab_, `Jupyter Notebook`_, `VSCode-Python`_, and `Colab`_. It requires a web connection in order to load relevant Javascript libraries. Note that, although this renderer uses ``JupyterChart``, it does not provide the ability to access value and selection params in Python. To do so, create a ``JupyterChart`` object explicitly following the instructions in the :ref:`user-guide-jupyterchart` documentation. ``alt.renderers.enable("jupyter", offline=True)`` *(added in version 5.3):* Same as the ``"jupyter"`` renderer above, but loads JavaScript dependencies from the ``vl-convert-python`` package (rather than from an online CDN) so that an internet connection is not required. ``alt.renderers.enable("browser")`` *(added in version 5.3):* Display charts in an external web browser. This renderer is particularly useful when using Vega-Altair in a local non-Jupyter environment, such as in `IPython`_ or `Spyder`_. See :ref:`display-browser` for more information. In addition, Altair includes the following renderers: - ``"default"``, ``"colab"``, ``"kaggle"``, ``"zeppelin"``: identical to ``"html"`` - ``"jupyterlab"``, ``"nteract"``: identical to ``"mimetype"`` - ``"png"``: renderer that renders and converts the chart to PNG, outputting it using the ``"image/png"`` MIME type. - ``"svg"``: renderer that renders and converts the chart to an SVG image, outputting it using the ``"image/svg+xml"`` MIME type. - ``"olli"``: renderer that uses `Olli`_ to generate accessible text structures for screen reader users. - ``"json"``: renderer that outputs the raw JSON chart specification, using the ``"application/json"`` MIME type. You can use ``alt.renderers.names()`` to return all registered renderers as a Python list. Other renderers can be installed by third-party packages via Python's entrypoints system or you can create your own, see :ref:`customizing-renderers`. .. _display-jupyterlab: Displaying in JupyterLab ------------------------ JupyterLab 1.0 and later will work with Altair's default renderer with a live web connection: no render enable step is required. Optionally, for offline rendering in JupyterLab, you can use the mimetype renderer:: # Optional in JupyterLab: requires an up-to-date vega labextension. alt.renderers.enable('mimetype') and ensure you have the proper version of the vega labextension installed; for Altair 4 this can be installed with: .. code-block:: bash $ jupyter labextension install @jupyterlab/vega5-extension In JupyterLab version 2.0 or newer, this extension is installed by default, though the version available in the JupyterLab release often takes a few months to catch up with new Altair releases. .. _display-notebook: Displaying in Jupyter Notebook ------------------------------ The classic Jupyter Notebook will work with Altair's default renderer with a live web connection: no render enable step is required. Optionally, for offline rendering in Jupyter Notebook, you can use the notebook renderer:: # Optional in Jupyter Notebook: requires an up-to-date vega nbextension. alt.renderers.enable('notebook') This renderer is provided by the `ipyvega`_ notebook extension, which can be installed and enabled either using pip: .. code-block:: bash $ pip install vega or conda: .. code-block:: bash $ conda install vega --channel conda-forge In older versions of the notebook (<5.3) you need to additionally enable the extension: .. code-block:: bash $ jupyter nbextension install --sys-prefix --py vega .. _display-nteract: Displaying in nteract --------------------- nteract_ cannot display HTML outputs natively, and so Altair's default ``html`` renderer will not work. However, nteract natively includes vega and vega-lite mimetype-based rendering. To use Altair in nteract, ensure you are using a version that supports the Vega-Lite v6 mimetype, and use:: alt.renderers.enable('mimetype') .. _display-vscode: Displaying in VSCode -------------------- `VSCode-Python`_ works with Altair's default renderer with a live web connection: no render enable step is required. Optionally, for offline rendering, you can use the mimetype renderer:: # Optional in VS Code alt.renderers.enable('mimetype') .. _display_dashboards: Dashboards ---------- Altair is compatible with common Python dashboarding packages. Many of them even provide support for reading out :ref:`parameters ` from the chart. This allows you to e.g. select data points and update another part of the dashboard such as a table based on that selection: =================================================================================================================================== =================================== ============================= Package Displays interactive Altair charts Supports reading out parameters =================================================================================================================================== =================================== ============================= `Panel `_ ✔ ✔ `Plotly Dash `_ ✔ ✔ `Jupyter Voila `_ using :ref:`JupyterChart ` ✔ ✔ `Marimo `_ ✔ ✔ `Shiny `_ using :ref:`JupyterChart ` ✔ ✔ `Solara `_ ✔ ✔ `Streamlit `_ ✔ ✔ =================================================================================================================================== =================================== ============================= The above mentioned frameworks all require you to run a web application on a server if you want to share your work with others. A web application gives you a lot of flexibility, you can for example fetch data from a database based on the value of a dropdown menu in the dashboard. However, it comes with some complexity as well. For use cases where the interactivity provided by Altair itself is enough, you can also use tools which generate HTML pages which do not require a web server such as `Quarto `_ or `Jupyter Book `_. If you are using a dashboarding package that is not listed here, please `open an issue `_ on GitHub so that we can add it. .. _display-general: Working in environments without a JavaScript frontend ----------------------------------------------------- The Vega-Lite specifications produced by Altair can be produced in any Python environment, but to render these specifications currently requires a JavaScript engine. For this reason, Altair works most seamlessly with the browser-based environments mentioned above. Even so, Altair can be used effectively in non-browser based environments using the approaches described below. Static Image Renderers ~~~~~~~~~~~~~~~~~~~~~~ The ``"png"`` and ``"svg"`` renderers rely on the JavaScript engine embedded in the vl-convert optional dependency to generate static images from Vega-Lite chart specifications. These static images are then displayed in IPython-based environments using the Mime Renderer Extensions system. This approach may be used to display static versions of Altair charts inline in the `IPython QtConsole`_ and `Spyder`_, as well as in browser-based environments like JupyterLab. The ``"svg"`` renderer is enabled like this:: alt.renderers.enable("svg") The ``"png"`` renderer is enabled like this:: alt.renderers.enable("png") The ``"png"`` renderer supports the following keyword argument configuration options: - The ``scale_factor`` argument may be used to increase the chart size by the specified scale factor (Default 1.0). - The ``ppi`` argument controls the pixels-per-inch resolution of the displayed image (Default 72). Example usage:: alt.renderers.enable("png", scale_factor=2, ppi=144) .. _display-browser: Browser Renderer ~~~~~~~~~~~~~~~~ To support displaying charts with interactive features in non-browser based environments, the ``"browser"`` renderer automatically opens charts in browser tabs of a system web browser. The ``"browser"`` renderer is enabled like this:: alt.renderers.enable("browser") The ``"browser"`` renderer supports the following keyword argument configuration options: - The ``using`` argument may be used to specify which system web browser to use. This may be set to a string to indicate the single browser that must be used (e.g. ``"safari"``), or it may be set to a list of browser names where the first available browser is used. See the documentation for the `webbrowser module`_ for the list of supported browser names. If not specified, the system default browser is used. - The ``offline`` argument may be used to specify whether JavaScript dependencies should be loaded from an online CDN or embedded alongside the chart specification. When ``offline`` is ``False`` (the default), JavaScript dependencies are loaded from an online CDN, and so an internet connection is required. When ``offline`` is ``True``, JavaScript dependencies are embedded alongside the chart specification and so no internet connection is required. Setting ``offline`` to ``True`` requires the optional ``vl-convert-python`` dependency. - The ``port`` argument may be used to configure the system port that the chart HTML is served on. Defaults to a random open port. Limitations: - The ``"browser"`` renderer sets up a temporary web server that serves the chart exactly once, then opens the designated browser pointing to the server's URL. This approach does not require the creation of temporary HTML files on disk, and it's memory efficient as there are no long-lived web server processes required. A limitation of this approach is that the chart will be lost if the browser is refreshed, and it's not possible to copy the chart URL and paste it in another browser tab. - When used in IPython-based environments, the ``"browser"`` renderer will automatically open the chart in the browser when the chart is the final value of the cell or command. This behavior is not available in the standard ``python`` REPL. In this case, the ``chart.show()`` method may be used to manually invoke the active renderer and open the chart in the browser. - This renderer is not compatible with remote environments like `Binder`_ or `Colab`_. Manual ``save()`` and display ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you would prefer, you can save your chart to a file (html, png, etc.) first and then display it. See :ref:`user-guide-saving` for more information. .. _display-troubleshooting: Troubleshooting --------------- Altair has a number of moving parts: it creates data structures in Python, those structures are passed to front-end renderers, and the renderers run JavaScript code to generate the output. This complexity means that it's possible to get into strange states where things don't immediately work as expected. This section summarizes some of the most common problems and their solutions. .. _troubleshooting-general: General Troubleshooting ~~~~~~~~~~~~~~~~~~~~~~~ Chart does not display at all ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you are expecting a chart output and see nothing at all, it means that the Javascript rendering libraries are not being invoked. This can happen for several reasons: 1. You have an old browser that doesn't support JavaScript's `ECMAScript 6`_: in this case, charts may not display properly or at all. For example, Altair charts will not render in any version of Internet Explorer. If this is the case, you will likely see syntax errors in your browser's `Javascript Console`_. 2. Your browser is unable to load the javascript libraries. This may be due to a local firewall, an adblocker, or because your browser is offline. Check your browser's `Javascript Console`_ to see if there are errors. 3. You may be failing to trigger the notebook's display mechanism (see below). If you are working in a notebook environment, the chart is only displayed if the **last line of the cell evaluates to a chart object** By analogy, consider the output of simple Python operations:: >>> x = 4 # no output here >>> x # output here, because x is evaluated 4 >>> x * 2 # output here, because the expression is evaluated 8 If the last thing you type consists of an assignment operation, there will be no output displayed. This turns out to be true of Altair charts as well: .. altair-plot:: :output: none import altair as alt from altair.datasets import data cars = data.cars.url chart = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ) The last statement is an assignment, so there is no output and the chart is not shown. If you have a chart assigned to a variable, you need to end the cell with an evaluation of that variable: .. altair-plot:: chart = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ) chart Alternatively, you can evaluate a chart directly, and not assign it to a variable, in which case the object definition itself is the final statement and will be displayed as an output: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ) Plot displays, but the content is empty ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sometimes charts may appear, but the content is empty; for example: .. altair-plot:: import altair as alt alt.Chart('nonexistent_file.csv').mark_line().encode( x='x:Q', y='y:Q', ) If this is the case, it generally means one of two things: 1. your data is specified by a URL that is invalid or inaccessible 2. your encodings do not match the columns in your data source In the above example, ``nonexistent_file.csv`` doesn't exist, and so the chart does not render (associated warnings will be visible in the `Javascript Console`_). Some other specific situations that may cause this: You have an adblocker active Charts that reference data by URL can sometimes trigger false positives in your browser's adblocker. Check your browser's `Javascript Console`_ for errors, and try disabling your adblocker. You are loading data cross-domain If you save a chart to HTML and open it using a ``file://`` url in your browser, most browsers will not allow the javascript to load datasets from an ``http://`` domain. This is a security feature in your browser that cannot be disabled. To view such charts locally, a good approach is to use a simple local HTTP server like the one provided by Python:: $ python -m http.server Your encodings do not match your data A similar blank chart results if you refer to a field that does not exist in the data, either because of a typo in your field name, or because the column contains special characters (see below). Here is an example of a mis-specified field name leading to a blank chart: .. altair-plot:: import pandas as pd data = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 1, 4]}) alt.Chart(data).mark_point().encode( x='x:Q', y='y:Q', color='color:Q' # <-- this field does not exist in the data! ) Altair does not check whether fields are valid, because there are many avenues by which a field can be specified within the full schema, and it is too difficult to account for all corner cases. Improving the user experience in this is a priority; see https://github.com/vega/vega-lite/issues/3576. Encodings with special characters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The Vega-Lite grammar on which Altair is based allows for encoding names to use special characters to access nested properties (See Vega-Lite's Field_ documentation). This can lead to errors in Altair when trying to use such columns in your chart. For example, the following chart is invalid: .. altair-plot:: import pandas as pd data = pd.DataFrame({'x.value': [1, 2, 3]}) alt.Chart(data).mark_point().encode( x='x.value:Q', ) To plot this data directly, you must escape the period in the field name: .. altair-plot:: import pandas as pd data = pd.DataFrame({'x.value': [1, 2, 3]}) alt.Chart(data).mark_point().encode( x=r'x\.value:Q', ) In general, it is better to avoid special characters like ``"."``, ``"["``, and ``"]"`` in your data sources where possible. .. _troubleshooting-jupyterlab: Troubleshooting in JupyterLab ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _jupyterlab-vega-lite-4-object: VegaLite 4/5 Object ^^^^^^^^^^^^^^^^^^^ *If you are using the Jupyter notebook rather than JupyterLab, then refer to* :ref:`notebook-vega-lite-4-object` If you are using JupyterLab (not Jupyter notebook) and see an error message mentioning either ``VegaLite 4 object`` or ``VegaLite 5 object``, then this means that you have enabled the ``mimetype`` renderer, but that your JupyterLab frontend does not support the VegaLite 4 or 5 mimetype. The easiest solution is to use the default renderer:: alt.renderers.enable('default') and rerun the cell with the chart. If you would like to use the mimetype rendering with JupyterLab, update JupyterLab to the newest version with ``pip install -U jupyterlab`` or ``conda update jupyterlab``. .. _jupyterlab-vega-lite-3-object: VegaLite 3 Object ^^^^^^^^^^^^^^^^^ *If you are using the Jupyter notebook rather than JupyterLab, then refer to* :ref:`notebook-vega-lite-3-object` If you are using JupyterLab (not Jupyter notebook) and see the following output:: This most likely means that you are using too old a version of JupyterLab. Altair 3.0 or later works best with JupyterLab version 1.0 or later; check the version with:: $ jupyter lab --version 1.2.0 If you have an older jupyterlab version, then use ``pip install -U jupyterlab`` or ``conda update jupyterlab`` to update JupyterLab, depending on how you first installed it. JavaScript output is disabled in JupyterLab ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you are using JupyterLab and see the following output:: JavaScript output is disabled in JupyterLab it can mean one of two things is wrong 1. You are using an old version of Altair. JupyterLab only works with Altair version 2.0 or newer; you can check the altair version by executing the following in a notebook code cell:: import altair as alt alt.__version__ If the version is older than 2.0, then exit JupyterLab and follow the installation instructions at :ref:`display-jupyterlab`. 2. You have enabled the wrong renderer. JupyterLab works with the default renderer, but if you have used ``alt.renderers.enable()`` to enable another renderer, charts will no longer render correctly in JupyterLab. You can check which renderer is active by running:: import altair as alt print(alt.renderers.active) JupyterLab rendering will work only if the active renderer is ``"default"`` or ``"jupyterlab"``. You can re-enable the default renderer by running:: import altair as alt alt.renderers.enable('default') (Note that the default renderer is enabled, well, by default, and so this is only necessary if you've somewhere changed the renderer explicitly). .. _jupyterlab-textual-chart-representation: Textual Chart Representation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *If you are using the Notebook rather than the JupyterLab, then refer to* :ref:`notebook-textual-chart-representation` If you are using JupyterLab and see a textual representation of the Chart object similar to this:: Chart({ data: 'https://vega.github.io/vega-datasets/data/cars.json', encoding: FacetedEncoding({ x: X({ shorthand: 'Horsepower' }) }), mark: 'point' }) it probably means that you are using an older Jupyter kernel. You can confirm this by running:: import IPython; IPython.__version__ # 6.2.1 Altair will not display correctly if using a kernel with IPython version 4.X or older. The easiest way to address this is to change your kernel: choose "Kernel"->"Change Kernel" and then use the first kernel that appears. .. _jupyterlab-notebook-backend: Javascript Error: require is not defined ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you are using JupyterLab and see the error:: Javascript Error: require is not defined This likely means that you have enabled the notebook renderer, which is not supported in JupyterLab: that is, you have somewhere run ``alt.renderers.enable('notebook')``. JupyterLab supports Altair's default renderer, which you can re-enable using:: alt.renderers.enable('default') .. _troubleshooting-notebook: Troubleshooting in Notebooks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _notebook-vega-lite-4-object: Notebook: VegaLite 4/5 object ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *If you are using JupyterLab rather than the Jupyter notebook, then refer to* :ref:`jupyterlab-vega-lite-4-object` If you are using Jupyter Notebook (not JupyterLab) and see an error message mentioning either ``VegaLite 4 object`` or ``VegaLite 5 object``, then this means that you have enabled the ``mimetype`` renderer. The easiest solution is to use the default renderer:: alt.renderers.enable('default') and rerun the cell with the chart. .. _notebook-vega-lite-3-object: Notebook: VegaLite 3 object ^^^^^^^^^^^^^^^^^^^^^^^^^^^ *If you are using JupyterLab rather than the Jupyter notebook, then refer to* :ref:`jupyterlab-vega-lite-3-object` If you are using the notebook (not JupyterLab) and see the the following output:: it means that either: 1. You have forgotten to enable the notebook renderer. As mentioned in :ref:`display-notebook`, you need to install version 2.0 or newer of the ``vega`` package and Jupyter extension, and then enable it using:: import altair as alt alt.renderers.enable('notebook') in order to render charts in the classic notebook. If the above code gives an error:: NoSuchEntryPoint: No 'notebook' entry point found in group 'altair.vegalite.v2.renderer' This means that you have not installed the vega package. If you see this error, please make sure to follow the standard installation instructions at :ref:`display-notebook`. 2. You have too old a version of Jupyter notebook. Run:: $ jupyter notebook --version and make certain you have version 5.3 or newer. If not, then update the notebook using either ``pip install -U jupyter notebook`` or ``conda update jupyter notebook`` depending on how you first installed the packages. If you have done the above steps and charts still do not render, it likely means that you are using a different *Kernel* within your notebook. Switch to the kernel named *Python 2* if you are using Python 2, or *Python 3* if you are using Python 3. .. _notebook-textual-chart-representation: Notebook: Textual Chart Representation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *If you are using the Notebook rather than the JupyterLab, then refer to* :ref:`jupyterlab-textual-chart-representation` *If you are not using a Jupyter notebook environment, then refer to* :ref:`troubleshooting-non-notebook`. If you are using Jupyter notebook and see a textual representation of the Chart object similar to this:: Chart({ data: 'https://vega.github.io/vega-datasets/data/cars.json', encoding: FacetedEncoding({ x: X({ shorthand: 'Horsepower' }) }), mark: 'point' }) it probably means that you are using an older Jupyter kernel. You can confirm this by running:: import IPython; IPython.__version__ # 6.2.1 Altair will not display correctly if using a kernel with IPython version 4.X or older. The easiest way to address this is to change your kernel: choose "Kernel"->"Change Kernel" and then select "Python 2" or "Python 3", depending on what version of Python you used when installing Altair. .. _troubleshooting-non-notebook: Troubleshooting outside of Jupyter ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you are using Altair outside of a Jupyter notebook environment (such as a Python or IPython terminal) charts will be displayed as a textual representation. Rendering of Altair charts requires executing Javascript code, which your Python terminal cannot do natively. For recommendations on how to use Altair outside of notebook environments, see :ref:`display-general`. .. _`ECMAScript 6`: https://www.w3schools.com/js/js_es6.asp .. _`Javascript Console`: https://webmasters.stackexchange.com/questions/8525/how-do-i-open-the-javascript-console-in-different-browsers .. _Field: https://vega.github.io/vega-lite/docs/field.html .. _ipyvega: https://github.com/vega/ipyvega/ .. _JupyterLab: http://jupyterlab.readthedocs.io/en/stable/ .. _nteract: https://nteract.io .. _nbconvert: https://nbconvert.readthedocs.io/ .. _nbviewer: https://nbviewer.jupyter.org/ .. _Colab: https://colab.research.google.com .. _Hydrogen: https://github.com/nteract/hydrogen .. _Jupyter Notebook: https://jupyter-notebook.readthedocs.io/en/stable/ .. _Vega-Lite: http://vega.github.io/vega-lite .. _Vega: https://vega.github.io/vega/ .. _VSCode-Python: https://code.visualstudio.com/docs/python/python-tutorial .. _Zeppelin: https://zeppelin.apache.org/ .. _Binder: https://mybinder.org/ .. _IPython: https://ipython.org/ .. _Spyder: https://www.spyder-ide.org/ .. _IPython QtConsole: https://qtconsole.readthedocs.io/en/stable/ .. _webbrowser module: https://docs.python.org/3/library/webbrowser.html#webbrowser.register .. _Olli: https://mitvis.github.io/olli/ ================================================ FILE: doc/user_guide/encodings/channel_options.rst ================================================ .. currentmodule:: altair .. _user-guide-encoding-channel-options: Channel Options --------------- Some encoding channels allow for additional options to be expressed. These can control things like axis properties, scale properties, headers and titles, binning parameters, aggregation, sorting, and many more. The section titles below refer to the channels introduced in :ref:`user-guide-encoding-channels` and show the accepted options for these channels. X and Y ~~~~~~~ The :class:`X` and :class:`Y` encodings accept the following options: .. altair-object-table:: altair.PositionFieldDef Color, Fill, and Stroke ~~~~~~~~~~~~~~~~~~~~~~~ The :class:`Color`, :class:`Fill`, and :class:`Stroke` encodings accept the following options: .. altair-object-table:: altair.FieldOrDatumDefWithConditionMarkPropFieldDefGradientstringnull Shape ~~~~~ The :class:`Shape` encoding accepts the following options: .. altair-object-table:: altair.FieldOrDatumDefWithConditionMarkPropFieldDefTypeForShapestringnull Order ~~~~~ The :class:`Order` encoding accepts the following options: .. altair-object-table:: altair.OrderFieldDef Angle, FillOpacity, Opacity, Size, StrokeOpacity, and StrokeWidth ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The :class:`Angle`, :class:`FillOpacity`, :class:`Opacity`, :class:`Size`, :class:`StrokeOpacity`, and :class:`StrokeWidth` encodings accept the following options: .. altair-object-table:: altair.FieldOrDatumDefWithConditionMarkPropFieldDefnumber StrokeDash ~~~~~~~~~~ The :class:`StrokeDash` encoding accepts the following options: .. altair-object-table:: altair.FieldOrDatumDefWithConditionMarkPropFieldDefnumberArray Row and Column ~~~~~~~~~~~~~~ The :class:`Row` and :class:`Column`, and :class:`Facet` encodings accept the following options: .. altair-object-table:: altair.RowColumnEncodingFieldDef Facet ~~~~~ The :class:`Facet` encoding accepts the following options: .. altair-object-table:: altair.FacetEncodingFieldDef Text ~~~~ The :class:`Text` encoding accepts the following options: .. altair-object-table:: altair.FieldOrDatumDefWithConditionStringFieldDefText Href, Tooltip, Url ~~~~~~~~~~~~~~~~~~ The :class:`Href`, :class:`Tooltip`, and :class:`Url` encodings accept the following options: .. altair-object-table:: altair.StringFieldDefWithCondition Detail ~~~~~~ The :class:`Detail` encoding accepts the following options: .. altair-object-table:: altair.FieldDefWithoutScale Latitude and Longitude ~~~~~~~~~~~~~~~~~~~~~~ The :class:`Latitude` and :class:`Longitude` encodings accept the following options: .. altair-object-table:: altair.LatLongFieldDef Radius and Theta ~~~~~~~~~~~~~~~~ The :class:`Radius` and :class:`Theta` encodings accept the following options: .. altair-object-table:: altair.PositionFieldDefBase Latitude2, Longitude2, Radius2, Theta2, X2, Y2, XError, YError, XError2, and YError2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The :class:`Latitude2`, :class:`Longitude2`, :class:`Radius2`, :class:`Theta2`, :class:`X2`, :class:`Y2`, :class:`XError`, :class:`YError`, :class:`XError2`, and :class:`YError2` encodings accept the following options: .. altair-object-table:: altair.SecondaryFieldDef ================================================ FILE: doc/user_guide/encodings/channels.rst ================================================ .. currentmodule:: altair .. _user-guide-encoding-channels: Channels -------- Altair provides a number of encoding channels that can be useful in different circumstances. The following sections summarize them: Position ~~~~~~~~ ========== =================== ================================= =================================== Channel Altair Class Description Example ========== =================== ================================= =================================== x :class:`X` The x-axis value :ref:`gallery_scatter_tooltips` y :class:`Y` The y-axis value :ref:`gallery_scatter_tooltips` x2 :class:`X2` Second x value for ranges :ref:`gallery_gantt_chart` y2 :class:`Y2` Second y value for ranges :ref:`gallery_candlestick_chart` longitude :class:`Longitude` Longitude for geo charts :ref:`gallery_point_map` latitude :class:`Latitude` Latitude for geo charts :ref:`gallery_point_map` longitude2 :class:`Longitude2` Second longitude value for ranges :ref:`gallery_airport_connections` latitude2 :class:`Latitude2` Second latitude value for ranges :ref:`gallery_airport_connections` xError :class:`XError` The x-axis error value N/A yError :class:`YError` The y-axis error value N/A xError2 :class:`XError2` The second x-axis error value N/A yError2 :class:`YError2` The second y-axis error value N/A xOffset :class:`XOffset` Offset to the x position :ref:`gallery_grouped_bar_chart2` yOffset :class:`YOffset` Offset to the y position :ref:`gallery_strip_plot_jitter` theta :class:`Theta` The start arc angle :ref:`gallery_radial_chart` theta2 :class:`Theta2` The end arc angle (radian) :ref:`gallery_pacman_chart` ========== =================== ================================= =================================== Mark Property ~~~~~~~~~~~~~ ============= ====================== ============================== ========================================= Channel Altair Class Description Example ============= ====================== ============================== ========================================= angle :class:`Angle` The angle of the mark :ref:`gallery_wind_vector_map` color :class:`Color` The color of the mark :ref:`gallery_simple_heatmap` fill :class:`Fill` The fill for the mark :ref:`gallery_ridgeline_plot` fillOpacity :class:`FillOpacity` The opacity of the mark's fill N/A opacity :class:`Opacity` The opacity of the mark :ref:`gallery_horizon_graph` radius :class:`Radius` The radius or the mark :ref:`gallery_radial_chart` shape :class:`Shape` The shape of the mark :ref:`gallery_us_incomebrackets_by_state_facet` size :class:`Size` The size of the mark :ref:`gallery_table_bubble_plot_github` stroke :class:`Stroke` The stroke of the mark N/A strokeDash :class:`StrokeDash` The stroke dash style :ref:`gallery_multi_series_line` strokeOpacity :class:`StrokeOpacity` The opacity of the line N/A strokeWidth :class:`StrokeWidth` The width of the line N/A ============= ====================== ============================== ========================================= Text and Tooltip ^^^^^^^^^^^^^^^^ ======= ================ ======================== ========================================= Channel Altair Class Description Example ======= ================ ======================== ========================================= text :class:`Text` Text to use for the mark :ref:`gallery_scatter_with_labels` tooltip :class:`Tooltip` The tooltip value :ref:`gallery_scatter_tooltips` ======= ================ ======================== ========================================= .. _hyperlink-channel: Hyperlink ~~~~~~~~~ ======= ================ ======================== ========================================= Channel Altair Class Description Example ======= ================ ======================== ========================================= href :class:`Href` Hyperlink for points :ref:`gallery_scatter_href` ======= ================ ======================== ========================================= Detail ~~~~~~ Grouping data is an important operation in data visualization. For line and area marks, mapping an unaggregated data field to any non-position channel will group the lines and stacked areas by that field. For aggregated plots, all unaggregated fields encoded are used as grouping fields in the aggregation (similar to fields in ``GROUP BY`` in SQL). The ``detail`` channel specifies an additional grouping field (or fields) for grouping data without mapping the field(s) to any visual properties. ======= ================ =============================== ========================================= Channel Altair Class Description Example ======= ================ =============================== ========================================= detail :class:`Detail` Additional property to group by :ref:`gallery_ranged_dot_plot` ======= ================ =============================== ========================================= For example here is a line chart showing stock prices of 5 tech companies over time. We map the ``symbol`` variable to ``detail`` to use them to group lines. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x="date:T", y="price:Q", detail="symbol:N" ) Order ~~~~~ The ``order`` option and :class:`Order` channel can sort how marks are drawn on the chart. For stacked marks, this controls the order of components of the stack. Here, the elements of each bar are sorted alphabetically by the name of the nominal data in the color channel. .. altair-plot:: import altair as alt from altair.datasets import data barley = data.barley() alt.Chart(barley).mark_bar().encode( x='variety:N', y='sum(yield):Q', color='site:N', order=alt.Order("site").sort("ascending") ) The order can be reversed by changing the sort option to `descending`. .. altair-plot:: import altair as alt from altair.datasets import data barley = data.barley() alt.Chart(barley).mark_bar().encode( x='variety:N', y='sum(yield):Q', color='site:N', order=alt.Order("site").sort("descending") ) The same approach works for other mark types, like stacked areas charts. .. altair-plot:: import altair as alt from altair.datasets import data barley = data.barley() alt.Chart(barley).mark_area().encode( x='variety:N', y='sum(yield):Q', color='site:N', order=alt.Order("site").sort("ascending") ) Note that unlike the ``sort`` parameter to positional encoding channels, the :class:`Order` channel cannot take a list of values to sort by and is not automatically sorted when an ordered pandas categorical column is passed. If we want to sort stacked segments in a custom order, we can `follow the approach in this issue comment `_, although there might be edge cases where this is not fully supported. This workaround also makes the order of the segments align with the order that the colors shows up in a legend that uses custom sorting for the color domain. For line marks, the :class:`Order` channel encodes the order in which data points are connected. This can be useful for creating a scatter plot that draws lines between the dots using a different field than the x and y axes. .. altair-plot:: import altair as alt from altair.datasets import data driving = data.driving() alt.Chart(driving).mark_line(point=True).encode( alt.X('miles').scale(zero=False), alt.Y('gas').scale(zero=False), order='year' ) Facet ~~~~~ For more information, see :ref:`facet-chart`. ======= ================ =============================================== ============================================= Channel Altair Class Description Example ======= ================ =============================================== ============================================= column :class:`Column` The column of a faceted plot :ref:`gallery_scatter_faceted` row :class:`Row` The row of a faceted plot :ref:`gallery_beckers_barley_facet` facet :class:`Facet` The row and/or column of a general faceted plot :ref:`gallery_us_population_over_time_facet` ======= ================ =============================================== ============================================= ================================================ FILE: doc/user_guide/encodings/index.rst ================================================ .. currentmodule:: altair .. _user-guide-encoding: Encodings --------- The key to creating meaningful visualizations is to map *properties of the data* to *visual properties* in order to effectively communicate information. In Altair, this mapping of visual properties to data columns is referred to as an **encoding**, and is most often expressed through the :meth:`Chart.encode` method. For example, here we will visualize the cars dataset using four of the available **encoding channels** (see :ref:`user-guide-encoding-channels` for details): ``x`` (the x-axis value), ``y`` (the y-axis value), ``color`` (the color of the marker), and ``shape`` (the shape of the point marker): .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars() alt.Chart(cars).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', color='Origin', shape='Origin' ) Channel Options ~~~~~~~~~~~~~~~ Each encoding channel accepts a number of **channel options** (see :ref:`user-guide-encoding-channel-options` for details) which can be used to further configure the chart. Altair 5.0 introduced a method-based syntax for setting channel options as a more convenient alternative to the traditional attribute-based syntax described in :ref:`attribute-based-attribute-setting` (but you can still use the attribute-based syntax if you prefer). .. note:: With the release of Altair 5, the documentation was updated to prefer the method-based syntax. The gallery examples still include the attribute-based syntax in addition to the method-based syntax. .. _method-based-attribute-setting: Method-Based Syntax ^^^^^^^^^^^^^^^^^^^ The method-based syntax replaces *keyword arguments* with *methods*. For example, an ``axis`` option of the ``x`` channel encoding would traditionally be set using the ``axis`` keyword argument: ``x=alt.X('Horsepower', axis=alt.Axis(tickMinStep=50))``. To define the same :class:`X` object using the method-based syntax, we can instead use the more succinct ``x=alt.X('Horsepower').axis(tickMinStep=50)``. The same technique works with all encoding channels and all channel options. For example, notice how we make the analogous change with respect to the ``title`` option of the ``y`` channel. The following produces the same chart as the previous example. .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Horsepower').axis(tickMinStep=50), alt.Y('Miles_per_Gallon').title('Miles per Gallon'), color='Origin', shape='Origin' ) These option-setter methods can also be chained together, as in the following, in which we set the ``axis``, ``bin``, and ``scale`` options of the ``x`` channel by using the corresponding methods (``axis``, ``bin``, and ``scale``). We can break the ``x`` definition over multiple lines to improve readability. (This is valid syntax because of the enclosing parentheses from ``encode``.) .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Horsepower') .axis(ticks=False) .bin(maxbins=10) .scale(domain=(30,300), reverse=True), alt.Y('Miles_per_Gallon').title('Miles per Gallon'), color='Origin', shape='Origin' ) .. _attribute-based-attribute-setting: Attribute-Based Syntax ^^^^^^^^^^^^^^^^^^^^^^ The two examples from the section above would look as follows with the traditional attribute-based syntax: .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Horsepower', axis=alt.Axis(tickMinStep=50)), alt.Y('Miles_per_Gallon', title="Miles per Gallon"), color='Origin', shape='Origin' ) For specs making extensive use of channel options, the attribute-based syntax can become quite verbose: .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X( 'Horsepower', axis=alt.Axis(ticks=False), bin=alt.Bin(maxbins=10), scale=alt.Scale(domain=(30,300), reverse=True) ), alt.Y('Miles_per_Gallon', title='Miles per Gallon'), color='Origin', shape='Origin' ) .. _encoding-data-types: Encoding Data Types ~~~~~~~~~~~~~~~~~~~ The details of any mapping depend on the *type* of the data. Altair recognizes five main data types: ============ ============== ================================================ Data Type Shorthand Code Description ============ ============== ================================================ quantitative ``Q`` a continuous real-valued quantity ordinal ``O`` a discrete ordered quantity nominal ``N`` a discrete unordered category temporal ``T`` a time or date value geojson ``G`` a geographic shape ============ ============== ================================================ For data specified as a DataFrame, Altair can automatically determine the correct data type for each encoding, and creates appropriate scales and legends to represent the data. If types are not specified for data input as a DataFrame, Altair defaults to ``quantitative`` for any numeric data, ``temporal`` for date/time data, and ``nominal`` for string data, but be aware that these defaults are by no means always the correct choice! The types can either be expressed in a long-form using the channel encoding classes such as :class:`X` and :class:`Y`, or in short-form using the :ref:`Shorthand Syntax ` discussed below. For example, the following two methods of specifying the type will lead to identical plots: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Acceleration:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Acceleration', type='quantitative'), alt.Y('Miles_per_Gallon', type='quantitative'), alt.Color('Origin', type='nominal') ) The shorthand form, ``x="name:Q"``, is useful for its lack of boilerplate when doing quick data explorations. The long-form, ``alt.X('name', type='quantitative')``, is useful when doing more fine-tuned adjustments to the encoding using channel options such as binning, axis, and scale. Specifying the correct type for your data is important, as it affects the way Altair represents your encoding in the resulting plot. .. _type-legend-scale: Effect of Data Type on Color Scales ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ As an example of this, here we will represent the same data three different ways, with the color encoded as a *quantitative*, *ordinal*, and *nominal* type, using three horizontally-concatenated charts (see :ref:`hconcat-chart`): .. altair-plot:: base = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', ).properties( width=140, height=140 ) alt.hconcat( base.encode(color='Cylinders:Q').properties(title='quantitative'), base.encode(color='Cylinders:O').properties(title='ordinal'), base.encode(color='Cylinders:N').properties(title='nominal'), ) The type specification influences the way Altair, via Vega-Lite, decides on the color scale to represent the value, and influences whether a discrete or continuous legend is used. .. _type-axis-scale: Effect of Data Type on Axis Scales ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Similarly, for x and y axis encodings, the type used for the data will affect the scales used and the characteristics of the mark. For example, here is the difference between a ``ordinal``, ``quantitative``, and ``temporal`` scale for a column that contains integers specifying a year: .. altair-plot:: pop = data.population() base = alt.Chart(pop).mark_bar().encode( alt.Y('mean(people):Q').title('Total population') ).properties( width=140, height=140 ) alt.hconcat( base.encode(x='year:O').properties(title='ordinal'), base.encode(x='year:Q').properties(title='quantitative'), base.encode(x='year:T').properties(title='temporal') ) Because values on quantitative and temporal scales do not have an inherent width, the bars do not fill the entire space between the values. These scales clearly show the missing year of data that was not immediately apparent when we treated the years as ordinal data, but the axis formatting is undesirable in both cases. To plot four digit integers as years with proper axis formatting, i.e. without thousands separator, we recommend converting the integers to strings first, and the specifying a temporal data type in Altair. While it is also possible to change the axis format with ``.axis(format='i')``, it is preferred to specify the appropriate data type to Altair. .. altair-plot:: pop['year'] = pop['year'].astype(str) base.mark_bar().encode(x='year:T').properties(title='temporal') This kind of behavior is sometimes surprising to new users, but it emphasizes the importance of thinking carefully about your data types when visualizing data: a visual encoding that is suitable for categorical data may not be suitable for quantitative data or temporal data, and vice versa. .. _shorthand-description: Encoding Shorthands ~~~~~~~~~~~~~~~~~~~ For convenience, Altair allows the specification of the variable name along with the aggregate and type within a simple shorthand string syntax. This makes use of the type shorthand codes listed in :ref:`encoding-data-types` as well as the aggregate names listed in :ref:`agg-func-table`. The following table shows examples of the shorthand specification alongside the long-form equivalent: =================== ======================================================= Shorthand Equivalent long-form =================== ======================================================= ``x='name'`` ``alt.X('name')`` ``x='name:Q'`` ``alt.X('name', type='quantitative')`` ``x='sum(name)'`` ``alt.X('name', aggregate='sum')`` ``x='sum(name):Q'`` ``alt.X('name', aggregate='sum', type='quantitative')`` ``x='count():Q'`` ``alt.X(aggregate='count', type='quantitative')`` =================== ======================================================= Escaping special characters in column names ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Seeing that Altair uses ``:`` as a special character to indicate the encoding data type, you might wonder what happens when the column name in your data includes a colon. When this is the case you will need to either rename the column or escape the colon. This is also true for other special characters such as ``.`` and ``[]`` which are used to access nested attributes in some data structures. The recommended thing to do when you have special characters in a column name is to rename your columns. For example, in pandas you could replace ``:`` with ``_`` via ``df.rename(columns=lambda x: x.replace(':', '_'))``. If you don't want to rename your columns you will need to escape the special characters using a raw string with a backslash: .. altair-plot:: import pandas as pd source = pd.DataFrame({ 'col:colon': [1, 2, 3], 'col.period': ['A', 'B', 'C'], 'col[brackets]': range(3), }) alt.Chart(source).mark_bar().encode( x=r'col\:colon', # Remove the backslash in the title y=alt.Y(r'col\.period').title('col.period'), # Specify the data type color=r'col\[brackets\]:N', ) As can be seen above, indicating the data type is optional just as for columns without escaped characters. Note that the axes titles include the backslashes by default and you will need to manually set the title strings to remove them. If you are using the long form syntax for encodings, you do not need to escape colons as the type is explicit, e.g. ``alt.X(field='col:colon', type='quantitative')`` (but periods and brackets still need to be escaped in the long form syntax unless they are used to index nested data structures). .. _encoding-aggregates: Binning and Aggregation ~~~~~~~~~~~~~~~~~~~~~~~ Beyond simple channel encodings, Altair's visualizations are built on the concept of the database-style grouping and aggregation; that is, the `split-apply-combine `_ abstraction that underpins many data analysis approaches. For example, building a histogram from a one-dimensional dataset involves splitting data based on the bin it falls in, aggregating the results within each bin using a *count* of the data, and then combining the results into a final figure. In Altair, such an operation looks like this: .. altair-plot:: alt.Chart(cars).mark_bar().encode( alt.X('Horsepower').bin(), y='count()' # could also use alt.Y(aggregate='count', type='quantitative') ) Notice here we use the shorthand version of expressing an encoding channel (see :ref:`shorthand-description`) with the ``count`` aggregation, which is the one aggregation that does not require a field to be specified. Similarly, we can create a two-dimensional histogram using, for example, the size of points to indicate counts within the grid (sometimes called a "Bubble Plot"): .. altair-plot:: alt.Chart(cars).mark_point().encode( alt.X('Horsepower').bin(), alt.Y('Miles_per_Gallon').bin(), size='count()', ) There is no need, however, to limit aggregations to counts alone. For example, we could similarly create a plot where the color of each point represents the mean of a third quantity, such as acceleration: .. altair-plot:: alt.Chart(cars).mark_circle().encode( alt.X('Horsepower').bin(), alt.Y('Miles_per_Gallon').bin(), size='count()', color='mean(Acceleration):Q' ) For a full list of available aggregates, see :ref:`agg-func-table`. Sort Option ~~~~~~~~~~~ Some channels accept a :class:`sort` option which determines the order of the scale being used for the channel. By default the scale is sorted in ascending alphabetical order, unless an `ordered pandas categorical column `_ is passed (without an explicit type specification) in which case Altair will use the column's inherent order to sort the scale. There are a number of different options available to change the sort order: - ``sort='ascending'`` (Default) will sort the field's value in ascending order. For string data, this uses standard alphabetical order. - ``sort='descending'`` will sort the field's value in descending order - Passing the name of an encoding channel to ``sort``, such as ``"x"`` or ``"y"``, allows for sorting by that channel. An optional minus prefix can be used for a descending sort. For example ``sort='-x'`` would sort by the x channel in descending order. - Passing a `Sequence `_ to ``sort`` allows you to explicitly set the order in which you would like the encoding to appear - Using the ``field`` and ``op`` parameters to specify a field and aggregation operation to sort by. Here is an example of applying these five different sort approaches on the x-axis, using the barley dataset: .. altair-plot:: import altair as alt from altair.datasets import data barley = data.barley() base = alt.Chart(barley).mark_bar().encode( y='mean(yield):Q', color=alt.Color('mean(yield):Q').legend(None) ).properties(width=100, height=100) # Sort x in ascending order ascending = base.encode( alt.X('site:N').sort('ascending') ).properties( title='Ascending' ) # Sort x in descending order descending = base.encode( alt.X('site:N').sort('descending') ).properties( title='Descending' ) # Sort x in an explicitly-specified order explicit = base.encode( alt.X('site:N').sort( ['Duluth', 'Grand Rapids', 'Morris', 'University Farm', 'Waseca', 'Crookston'] ) ).properties( title='Explicit' ) # Sort according to encoding channel sortchannel = base.encode( alt.X('site:N').sort('y') ).properties( title='By Channel' ) # Sort according to another field sortfield = base.encode( alt.X('site:N').sort(field='yield', op='mean') ).properties( title='By Yield' ) alt.concat( ascending, descending, explicit, sortchannel, sortfield, columns=3 ) The last two charts are the same because the default aggregation (see :ref:`agg-func-table`) is ``mean``. To highlight the difference between sorting via channel and sorting via field consider the following example where we don't aggregate the data and use the `op` parameter to specify a different aggregation than `mean` to use when sorting: .. altair-plot:: import altair as alt from altair.datasets import data barley = data.barley() base = alt.Chart(barley).mark_point().encode( y='yield:Q', ).properties(width=200) # Sort according to encoding channel sortchannel = base.encode( alt.X('site:N').sort('y') ).properties( title='By Channel' ) # Sort according to another field sortfield = base.encode( alt.X('site:N').sort(field='yield', op='max') ).properties( title='By Max Yield' ) sortchannel | sortfield Sorting Legends ^^^^^^^^^^^^^^^ Just as how the above examples show sorting of axes by specifying ``sort`` in the :class:`X` and :class:`Y` encodings, legends can be sorted by specifying ``sort`` in the encoding used in the legend (e.g. color, shape, size, etc). Below we show an example using the :class:`Color` encoding: .. altair-plot:: alt.Chart(barley).mark_bar().encode( alt.X('mean(yield):Q'), alt.Y('site:N').sort('x'), alt.Color('site:N').sort([ 'Morris', 'Duluth', 'Grand Rapids', 'University Farm', 'Waseca', 'Crookston' ]) ) Here the y-axis is sorted based on the x-values, while the color legend is sorted in the specified order, beginning with ``'Morris'``. In the next example, specifying ``field``, ``op`` and ``order``, sorts the legend sorted based on a chosen data field and operation. .. altair-plot:: alt.Chart(barley).mark_bar().encode( alt.X('mean(yield):Q'), alt.Y('site:N').sort('x'), color=alt.Color('site').sort(field='yield', op='max', order='ascending') ) Datum and Value ~~~~~~~~~~~~~~~ So far we always mapped an encoding channel to a column in our dataset. However, sometimes it is also useful to map to a single constant value. In Altair, you can do this with * ``datum``, which encodes a constant domain value via a scale using the same units as the underlying data * ``value``, which encodes a constant visual value, using absolute units such as an exact position in pixels, the name or RGB value of a color, the name of shape, etc ``datum`` is particularly useful for annotating a specific data value. For example, you can use it with a rule mark to highlight a threshold value (e.g., 300 dollars stock price). .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source) lines = base.mark_line().encode( x="date:T", y="price:Q", color="symbol:N" ) rule = base.mark_rule(strokeDash=[2, 2]).encode( y=alt.datum(300) ) lines + rule If we instead used ``alt.value`` in this example, we would position the rule 300 pixels from the top of the chart border rather than at the 300 dollars position. Since the default charts height is 300 pixels, this will show the dotted line just on top of the x-axis -line: .. altair-plot:: rule = base.mark_rule(strokeDash=[2, 2]).encode( y=alt.value(300) ) lines + rule If we want to use ``datum`` to highlight a certain year on the x-axis, we can't simply type in the year as an integer, but instead need to use ``datum`` together with :class:`DateTime`. Here we also set the color for the rule to the same one as the line for the symbol ``MSFT`` with ``alt.datum("MSFT")``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source) lines = base.mark_line().encode( x="date:T", y="price:Q", color="symbol:N" ) rule = base.mark_rule(strokeDash=[2, 2]).encode( x=alt.datum(alt.DateTime(year=2006)), color=alt.datum("MSFT") ) lines + rule Similar to when mapping to a data column, when using ``datum`` different encoding channels may support ``band``, ``scale``, ``axis``, ``legend``, ``format``, or ``condition`` properties. However, data transforms (e.g. ``aggregate``, ``bin``, ``timeUnit``, ``sort``) cannot be applied. Expanding on the example above, if you would want to color the ``rule`` mark regardless of the color scale used for the lines, you can use ``value``, e.g. ``alt.value("red")``: .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source) lines = base.mark_line().encode( x="date:T", y="price:Q", color="symbol:N" ) rule = base.mark_rule(strokeDash=[2, 2]).encode( x=alt.datum(alt.DateTime(year=2006)), color=alt.value("red") ) lines + rule One caution is that ``alt.datum`` and ``alt.value`` do not possess the (newly introduced as of Altair 5.0) method-based syntax to set channel options described in :ref:`method-based-attribute-setting`. For example, if you are using ``alt.datum`` for the ``y`` channel encoding and you wish to use an option setter method (e.g., ``scale``), then you can use :class:`YDatum` instead. Here is a simple example. .. altair-plot:: import altair as alt alt.Chart().mark_bar().encode( y=alt.YDatum(220).scale(domain=(0,500)), color=alt.value("darkkhaki") ) If you were to instead use ``y=alt.datum(220).scale(domain=(0,500))``, an ``AttributeError`` would be raised, due to the fact that ``alt.datum(220)`` simply returns a Python dictionary and does not possess a ``scale`` attribute. If you insisted on producing the preceding example using ``alt.datum``, one option would be to use ``y=alt.datum(220, scale={"domain": (0,500)})``. Nevertheless, the ``alt.YDatum`` approach is strongly preferred to this "by-hand" approach of supplying a dictionary to ``scale``. As one benefit, tab-completions are available using the ``alt.YDatum`` approach. For example, typing ``alt.YDatum(220).scale(do`` and hitting ``tab`` in an environment such as JupyterLab will offer ``domain``, ``domainMax``, ``domainMid``, and ``domainMin`` as possible completions. .. toctree:: :hidden: channels channel_options ================================================ FILE: doc/user_guide/interactions/bindings_widgets.rst ================================================ .. currentmodule:: altair .. _binding-parameters: Bindings & Widgets ~~~~~~~~~~~~~~~~~~ With an understanding of the parameter types and conditions, you can now bind parameters to chart elements (e.g. legends) and widgets (e.g. drop-downs and sliders). This is done using the ``bind`` option inside ``param`` and ``selection``. As specified by `the Vega-lite binding docs `_, there are three types of bindings available: 1. Point and interval selections can be used for data-driven interactive elements, such as highlighting and filtering based on values in the data. 2. Sliders and checkboxes can be used for logic-driven interactive elements, such as highlighting and filtering based on the absolute values in these widgets. 3. Interval selections can be bound to a scale, such as zooming in on a map. The following table summarizes the input elements that are supported in Vega-Lite: ========================= =========================================================================== =============================================== Input Element Description Example ========================= =========================================================================== =============================================== :class:`binding_checkbox` Renders as checkboxes allowing for multiple selections of items. :ref:`gallery_multiple_interactions` :class:`binding_radio` Radio buttons that force only a single selection :ref:`gallery_multiple_interactions` :class:`binding_select` Drop down box for selecting a single item from a list :ref:`gallery_multiple_interactions` :class:`binding_range` Shown as a slider to allow for selection along a scale. :ref:`gallery_us_population_over_time` :class:`binding` General method that supports many HTML input elements ========================= =========================================================================== =============================================== Widget Binding ^^^^^^^^^^^^^^ Widgets are HTML input elements, such as drop-downs, sliders, radio buttons, and search boxes. There are a three strategies for how variable and selection parameters can be used together with widgets: data-driven lookups, data-driven comparisons, and logic-driven comparisons. Data-Driven Lookups ------------------- Data-driven lookups use the active value(s) of the widget together with a ``selection`` parameter to look up points with matching values in the chart's dataset. For example, we can establish a binding between an input widget and a point selection to filter the data as in the example below where a drop-down is used to highlight cars of a specific ``Origin``: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url input_dropdown = alt.binding_select(options=['Europe', 'Japan', 'USA'], name='Region ') selection = alt.selection_point(fields=['Origin'], bind=input_dropdown) color = ( alt.when(selection) .then(alt.Color("Origin:N").legend(None)) .otherwise(alt.value("lightgray")) ) alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=color, ).add_params( selection ) Note that although it looks like a value is selected in the dropdown from the start, we need to set `value=` to actually start out with an initial selection in the chart. We did this previously with variable parameters and selection parameters follow the same pattern as you will see further down in the :ref:`encoding-channel-binding` section. As you can see above, we are still using :ref:`conditions ` to make the chart respond to the selection, just as we did without widgets. Bindings and input elements can also be used to filter data allowing the user to see just the selected points as in the example below. In this example, we also add an empty selection to illustrate how to revert to showing all points after a selection has been made in a radio button or drop-down (which cannot be deselected). .. altair-plot:: # Make radio button less cramped by adding a space after each label # The spacing will only show up in your IDE, not on this doc page options = ['Europe', 'Japan', 'USA'] labels = [option + ' ' for option in options] input_dropdown = alt.binding_radio( # Add the empty selection which shows all when clicked options=options + [None], labels=labels + ['All'], name='Region: ' ) selection = alt.selection_point( fields=['Origin'], bind=input_dropdown, ) alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', # We need to set a constant domain to preserve the colors # when only one region is shown at a time color=alt.Color('Origin:N').scale(domain=options), ).add_params( selection ).transform_filter( selection ) In addition to the widgets listed in the table above, Altair has access to `any html widget `_ via the more general ``binding`` function. In the example below, we use a search input to filter points that match the search string exactly. You can hover over the points to see the car names and try typing one into the search box, e.g. ``vw pickup`` to see the point highlighted (you need to type out the full name). .. altair-plot:: search_input = alt.selection_point( fields=['Name'], empty=False, # Start with no points selected bind=alt.binding( input='search', placeholder="Car model", name='Search ', ) ) alt.Chart(data.cars.url).mark_point(size=60).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', tooltip='Name:N', opacity=alt.when(search_input).then(alt.value(1)).otherwise(alt.value(0.05)), ).add_params( search_input ) It is not always useful to require an exact match to the search syntax, and when we will be learning about :ref:`expressions`, we will see how we can match partial strings via a regex instead. Data-Driven Comparisons ----------------------- So far we have seen the use of selections to lookup points with precisely matching values in our data. This is often useful, but sometimes we might want to make a more complex comparison than an exact match. For example, we might want to create a condition we select the points in the data that are above or below a threshold value, which is specified via a slider. For this workflow it is recommended to use variable parameters via ``param`` and as you can see below, we use the special syntax ``datum.xval`` to reference the column to compare against. Prefixing the column name with ``datum`` tells Altair that we want to compare to a column in the dataframe, rather than to a Python variable called ``xval``, which would have been the case if we just wrote ``xval < selector``. .. altair-plot:: import numpy as np import pandas as pd rand = np.random.RandomState(42) df = pd.DataFrame({ 'xval': range(100), 'yval': rand.randn(100).cumsum() }) slider = alt.binding_range(min=0, max=100, step=1, name='Cutoff ') selector = alt.param(name='SelectorName', value=50, bind=slider) predicate = alt.datum.xval < selector alt.Chart(df).mark_point().encode( x='xval', y='yval', color=alt.when(predicate).then(alt.value("red")).otherwise(alt.value("blue")), ).add_params( selector ) In this particular case we could actually have used a selection parameter since selection values can be accessed directly and used in expressions that affect the chart. For example, here we create a slider to choose a cutoff value, and color points based on whether they are smaller or larger than the value: .. altair-plot:: slider = alt.binding_range(min=0, max=100, step=1, name='Cutoff ') selector = alt.selection_point( name="SelectorName", fields=['cutoff'], bind=slider, value=[{'cutoff': 50}] ) predicate = alt.datum.xval < selector.cutoff alt.Chart(df).mark_point().encode( x='xval', y='yval', color=alt.when(predicate).then(alt.value("red")).otherwise(alt.value("blue")), ).add_params( selector ) While it can be useful to know how to access selection values in expression strings, using the parameters syntax introduced in Altair 5 often provides a more convenient syntax for simple interactions like this one since they can also be accessed in expression strings as we saw above. Similarly, it is often possible to use equality statements such as ``alt.datum.xval == selector`` to lookup exact values but it is often more convenient to switch to a selection parameter and specify a field/encoding. Logic-Driven Comparisons ------------------------ A logic comparison is a type of comparison that is based on logical rules and conditions, rather than on the actual data values themselves. For example, for a checkbox widget we want to check if the state of the checkbox is True or False and execute some action depending on whether it is checked or not. When we are using a checkbox as a toggle like this, we need to use `param` instead of `selection_point`, since we don't want to check if there are True/False values in our data, just if the value of the check box is True (checked) or False (unchecked): .. altair-plot:: bind_checkbox = alt.binding_checkbox(name='Scale point size by "Acceleration": ') param_checkbox = alt.param(bind=bind_checkbox) alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', size=alt.when(param_checkbox).then("Acceleration:Q").otherwise(alt.value(25)), ).add_params( param_checkbox ) Another example of creating a widget binding that is independent of the data, involves an interesting use case for the more general ``binding`` function. In the next example, this function introduces a color picker where the user can choose the colors of the chart interactively: .. altair-plot:: color_usa = alt.param(value="#317bb4", bind=alt.binding(input='color', name='USA ')) color_europe = alt.param(value="#ffb54d", bind=alt.binding(input='color', name='Europe ')) color_japan = alt.param(value="#adadad", bind=alt.binding(input='color', name='Japan ')) alt.Chart(data.cars.url).mark_circle().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.Color( 'Origin:N', scale=alt.Scale( domain=['USA', 'Europe', 'Japan'], range=[color_usa, color_europe, color_japan] ) ) ).add_params( color_usa, color_europe, color_japan ) .. _legend-binding: Legend Binding ^^^^^^^^^^^^^^ An interactive legend can often be helpful to assist in focusing in on groups of data. Instead of manually having to build a separate chart to use as a legend, Altair provides the ``bind='legend'`` option to facilitate the creation of clickable legends: .. altair-plot:: selection = alt.selection_point(fields=['Origin'], bind='legend') alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', opacity=alt.when(selection).then(alt.value(0.8)).otherwise(alt.value(0.2)), ).add_params( selection ) Scale Binding ^^^^^^^^^^^^^ With interval selections, the ``bind`` property can be set to the value of ``"scales"``. In these cases, the binding will automatically respond to the panning and zooming along the chart: .. altair-plot:: selection = alt.selection_interval(bind='scales') alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ).add_params( selection ) Because this is such a common pattern, Altair provides the :meth:`interactive` method which creates a scale-bound selection more concisely: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ).interactive() .. _encoding-channel-binding: Encoding Channel Binding ^^^^^^^^^^^^^^^^^^^^^^^^ To update which columns are displayed in a chart based on the selection in a widget, we would need to bind the widget to an encoding channel. In contrast to legend and scale bindings, it is not possible to setup a binding to an encoding channel in the selection initialization (e.g. by typing ``bind='x'``). Instead, parameters can be used to pass the value of a selection to an encoding channel. This gives more flexibility, but requires the use of a separate calculation transform (as in the example below) until https://github.com/vega/vega-lite/issues/7365 is resolved. In this example, we access the parameter value by referencing the parameter by name. By indexing the data with the parameter value (via ``datum[]``) we can extract the data column that matches the selected value of the parameter, and populate the x-channel with the values from this data column. .. altair-plot:: dropdown = alt.binding_select( options=['Horsepower', 'Displacement', 'Weight_in_lbs', 'Acceleration'], name='X-axis column ' ) xcol_param = alt.param( value='Horsepower', bind=dropdown ) alt.Chart(data.cars.url).mark_circle().encode( x=alt.X('x:Q').title(''), y='Miles_per_Gallon:Q', color='Origin:N' ).transform_calculate( x=f'datum[{xcol_param.name}]' ).add_params( xcol_param ) Using parameters inside calculate transforms allows us to define dynamic computations (e.g. subtracting different pairs of columns), as you can see in the :ref:`gallery_interactive_column_selection` gallery example. In that example, the chart title is also dynamically updated using a parameter inside an expression which is described in more detail in :ref:`accessing-parameter-values`. Note that it is currently not possible to change the axis titles dynamically based on the selected parameter value, but a text mark could be used instead (as in `this SO answer `_), until https://github.com/vega/vega-lite/issues/7264 is resolved. ================================================ FILE: doc/user_guide/interactions/expressions.rst ================================================ .. currentmodule:: altair .. _expressions: Expressions ~~~~~~~~~~~ Altair allows custom interactions by utilizing the `expression language of Vega `_ for writing basic formulas. A Vega expression string is a well-defined set of JavaScript-style operations. To simplify building these expressions in Python, Altair provides the ``expr`` module, which offers constants and functions to construct expressions using Python syntax. Both JavaScript-syntax and Python-syntax are supported within Altair to define an expression and an introductory example of each is available in the :ref:`user-guide-calculate-transform` transform documentation so we recommend checking out that page before continuing. Expressions inside Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the following example, we define a range connected to a parameter named ``param_width``. We then assign two expressions via ``param`` using both JavaScript and Python-syntax. As previously, we access the parameter values by referencing the parameters by name; in JavaScript that is done via ``f"{param_width.name}"`` whereas in Python it is sufficient to just type the variable name. Using these two expressions defined inside parameters, we can connect them to an encoding channel option, such as the title color of the axis. If the width is below ``200``, then the color is ``red``; otherwise, the color is ``blue``. .. altair-plot:: import altair as alt import numpy as np import pandas as pd rand = np.random.RandomState(42) df = pd.DataFrame({ 'xval': range(100), 'yval': rand.randn(100).cumsum() }) bind_range = alt.binding_range(min=100, max=300, name='Slider value: ') param_width = alt.param(bind=bind_range) # Examples of how to write both js and python expressions param_color_js_expr = alt.param(expr=f"{param_width.name} < 200 ? 'red' : 'black'") param_color_py_expr = alt.param(expr=alt.expr.if_(param_width < 200, 'red', 'black')) chart = alt.Chart(df).mark_point().encode( alt.X('xval').axis(titleColor=param_color_js_expr), alt.Y('yval').axis(titleColor=param_color_py_expr) ).add_params( param_width, param_color_js_expr, param_color_py_expr ) chart In the example above, we used a JavaScript-style ternary operator ``f"{param_width.name} < 200 ? 'red' : 'blue'"`` which is equivalent to the Python function ``expr.if_(param_width < 200, 'red', 'blue')``. The expressions defined as parameters also needed to be added to the chart within ``.add_params()``. Inline Expressions ^^^^^^^^^^^^^^^^^^ In addition to assigning an expression within a parameter definition as shown above, the ``expr()`` utility function allows us to define inline expressions. Inline expressions are not parameters, so they can be added directly in the chart spec instead of via ``add_params``, which is a convenient shorthand for writing out the full parameter code. In this example, we modify the chart above to change the size of the points based on an inline expression. Instead of creating a conditional statement, we use the value of the expression as the size directly and therefore only need to specify the name of the parameter. .. altair-plot:: chart.mark_point(size=alt.expr(param_width.name)) In addition to modifying the ``mark_*`` parameters, inline expressions can be passed to encoding channels as a value definition. Here, we make the exact same modification to the chart as in the previous example via this alternate approach: .. altair-plot:: chart.encode(size=alt.value(alt.expr(param_width.name))) `Some parameter names have special meaning in Vega-Lite `_, for example, naming a parameter ``width`` will automatically link it to the width of the chart. .. altair-plot:: bind_range = alt.binding_range(min=100, max=300, name='Chart width: ') param_width = alt.param('width', bind=bind_range) alt.Chart(df).mark_point().encode( alt.X('xval'), alt.Y('yval') ).add_params( param_width ) .. _accessing-parameter-values: Inline Expressions in Titles ---------------------------- An inline expression can be used to update the chart title to show the current value of the parameter. Here, we extend the code from the previous example by using an f-string inside an inline expression. The additional quotations and plus signs are needed for the parameter value to be interpreted correctly. .. altair-plot:: bind_range = alt.binding_range(min=100, max=300, name='Chart width: ') param_width = alt.param('width', bind=bind_range) # In Javascript, a number is converted to a string when added to an existing string, # which is why we use this nested quotation. title=alt.Title(alt.expr(f'"This chart is " + {param_width.name} + " px wide"')) alt.Chart(df, title=title).mark_point().encode( alt.X('xval'), alt.Y('yval') ).add_params( param_width ) In the example above, we accessed the value of a variable parameter and inserted it into the chart title. If we instead want our chart title to reflect the value from a selection parameter, it is not enough to reference only the name of the parameter. We also need to reference the field specified by the selection parameter (i.e. ``Origin`` in the example below): .. altair-plot:: from altair.datasets import data cars = data.cars.url input_dropdown = alt.binding_select(options=['Europe', 'Japan', 'USA'], name='Region ') selection = alt.selection_point(fields=['Origin'], bind=input_dropdown, value='Europe') title = alt.Title(alt.expr(f'"Cars from " + {selection.name}.Origin')) alt.Chart(cars, title=title).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', ).add_params( selection ).transform_filter( selection ) A Regex Search Widget --------------------- Now that we know the basics of expressions, let's see how we can improve on our search input example to make the search string match via a regex pattern. To do this we need to use ``expr.regex`` to define the regex string, and ``expr.test`` to test it against another string (in this case the string in the ``Name`` column). The ``i`` option makes the regex case insensitive, and you can see that we have switched to using ``param`` instead of ``selection_point`` since we are doing something more complex than looking up values with an exact match in the data. To try this out, you can type ``mazda|ford`` in the search input box below. .. altair-plot:: search_input = alt.param( value='', bind=alt.binding( input='search', placeholder="Car model", name='Search ', ) ) search_matches = alt.expr.test(alt.expr.regexp(search_input, "i"), alt.datum.Name) alt.Chart(cars).mark_point(size=60).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', tooltip='Name:N', opacity=alt.when(search_matches).then(alt.value(1)).otherwise(alt.value(0.05)), ).add_params(search_input) And remember, all this interactivity is client side. You can save this chart as an HTML file or put it on a static site generator such as GitHub/GitLab pages and anyone can interact with it without having to install Python. Quite powerful! Summary of Expressions ^^^^^^^^^^^^^^^^^^^^^^ - Altair can utilize the expression language of Vega for writing basic formulas to enable custom interactions. - Both JavaScript-style syntax and Python-style syntax are supported in Altair to define expressions. - Altair provides the ``expr`` module which allows expressions to be constructed with Python syntax. - Expressions can be included within a chart specification using two approaches: through a ``param(expr=...)`` parameter definition or inline using the ``expr(...)`` utility function. - Expressions can be used anywhere the documentation mentions that an `ExprRef` is an accepted value. This is mainly in three locations within a chart specification: mark properties, encoding channel options, and within a value definition for an encoding channel. They are also supported in the chart title, but not yet for subtitles or guide titles (i.e. axis and legends, see https://github.com/vega/vega-lite/issues/7408 for details). ================================================ FILE: doc/user_guide/interactions/index.rst ================================================ .. currentmodule:: altair .. _user-guide-interactions: Interactive Charts ================== One of the unique features of Altair, inherited from Vega-Lite, is a declarative grammar of not just visualization, but also *interaction*. This is both convenient and powerful, as we will see in this section. There are three core concepts of this grammar: - Parameters are the basic building blocks in the grammar of interaction. They can either be simple variables or more complex selections that map user input (e.g., mouse clicks and drags) to data queries. - Conditions and filters can respond to changes in parameter values and update chart elements based on that input. - Widgets and other chart input elements can bind to parameters so that charts can be manipulated via drop-down menus, radio buttons, sliders, legends, etc. In addition to these concepts, there are two additional components that enhance the capabilities of interactive visualizations in Altair: - Expressions allow for custom calculation via writing basic formulas. These can be used for fine-controlled interactivity, and are also available outside encodings. - JupyterCharts allow access to Altair's parameters from Python, e.g. printing the values of a selection in the plot. Further reading --------------- Once you have worked through the subpages for the topics listed above, you might wish to look through the :ref:`gallery-category-Interactive Charts` section of the example gallery for ideas about how they can be applied to more interesting charts. If you are planning to use Altair charts together with Dashboard packages, see the section on :ref:`Dashboards `. .. toctree:: :hidden: parameters bindings_widgets expressions jupyter_chart ================================================ FILE: doc/user_guide/interactions/jupyter_chart.rst ================================================ .. currentmodule:: altair .. _user-guide-jupyterchart: JupyterChart ============ The ``JupyterChart`` class, introduced in Vega-Altair 5.1, makes it possible to update charts after they have been displayed and access the state of :ref:`user-guide-interactions` from Python. Supported Environments ---------------------- ``JupyterChart`` is a `Jupyter Widget `_ built on the `AnyWidget `_ library. As such, it's compatible with development environments and dashboard toolkits that support third party Jupyter Widgets. Tested environments include: * Classic Jupyter Notebook * JupyterLab * Visual Studio Code * Google Colab * Voila .. note:: If you try ``JupyterChart`` in another environment that supports Jupyter Widgets, `let us know how it goes `_ so that we can keep this list up to date. Basic Usage ----------- To create a ``JupyterChart``, pass a regular ``Chart`` instance to the ``alt.JupyterChart`` constructor. The chart will be displayed automatically if the last expression in a notebook cell evaluates to a ``JupyterChart`` instance. For example: .. code-block:: python import altair as alt import pandas as pd source = pd.DataFrame({ 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52] }) chart = alt.Chart(source).mark_bar().encode( x='a', y='b' ) jchart = alt.JupyterChart(chart) jchart .. image:: /_static/jupyter_chart/simple_bar.svg :alt: Bar chart with letters A through I on the x-axis Updating Charts --------------- The ``JupyterChart``'s ``chart`` property can be assigned to a new chart instance, and the new chart will immediately be displayed in place of the old one. .. code-block:: python jchart.chart = chart.mark_bar(color="crimson", cornerRadius=10) .. raw:: html Params: Variables and Selections -------------------------------- As described in :ref:`user-guide-interactions`, Vega-Altair's rich grammar of interactivity is built on the concept of parameters. In particular, variable parameters (which store a simple value) and selection parameters (which map user interactions to data queries). The ``JupyterChart`` class makes both variable and selection parameters available for use in Python. Variable Params --------------- JupyterChart makes it possible to access, observe, set, and link variable parameters. Accessing Variable Params ~~~~~~~~~~~~~~~~~~~~~~~~~ A chart's variable parameters are stored in the ``params`` property of the ``JupyterChart`` instance. The values of individual named variable parameters may be accessed using regular attribute access. Here is an example that uses :ref:`binding-parameters` to bind a variable parameter named ``cutoff`` to a slider. The current value of the ``cutoff`` variable is available as ``jchart.params.cutoff``. .. code-block:: python import altair as alt import pandas as pd import numpy as np rand = np.random.RandomState(42) df = pd.DataFrame({ 'xval': range(100), 'yval': rand.randn(100).cumsum() }) slider = alt.binding_range(min=0, max=100, step=1) cutoff = alt.param(name="cutoff", bind=slider, value=50) predicate = alt.datum.xval < cutoff chart = alt.Chart(df).mark_point().encode( x='xval', y='yval', color=alt.when(predicate).then(alt.value("red")).otherwise(alt.value("blue")), ).add_params( cutoff ) jchart = alt.JupyterChart(chart) jchart .. raw:: html Observing Variable Params ~~~~~~~~~~~~~~~~~~~~~~~~~ The `observe `_ method on the ``params`` property may be used to register a callback that will be invoked when a parameter changes. In this example, a simple callback function is registered to print the value of the ``cutoff`` parameter. .. code-block:: python def on_cutoff_change(change): print(change.new) jchart.params.observe(on_cutoff_change, ["cutoff"]) .. raw:: html Setting Variable Params ~~~~~~~~~~~~~~~~~~~~~~~ The value of variable parameters may be updated from Python by assigning to the corresponding ``params`` attribute. Here's an example of updating the ``cutoff`` variable parameter by assigning to ``jchart.params.cutoff``. .. raw:: html Linking Variable Params ~~~~~~~~~~~~~~~~~~~~~~~ Because ``params`` is a traitlet object, it's possible to use the ipywidgets `link function `_ to bind params to other ipywidgets. Here is an example of linking the ``cutoff`` variable parameter to the value of an ipywidgets ``IntSlider``. .. code-block:: python from ipywidgets import IntSlider, link slider = IntSlider(23, min=0, max=100) link((slider, "value"), (jchart.params, "cutoff")) slider .. raw:: html If an ipywidget is linked to a Vega-Altair variable param, it's not necessary to also bind the param to a Vega-Altair widget. Here, the example above is updated to control the ``cutoff`` variable's value only from the ``IntSlider`` ipywidget. .. code-block:: python import pandas as pd import numpy as np rand = np.random.RandomState(42) df = pd.DataFrame({ 'xval': range(100), 'yval': rand.randn(100).cumsum() }) cutoff = alt.param(name="cutoff", value=50) predicate = alt.datum.xval < cutoff chart = alt.Chart(df).mark_point().encode( x='xval', y='yval', color=alt.when(predicate).then(alt.value("red")).otherwise(alt.value("blue")) ).add_params( cutoff ) jchart = alt.JupyterChart(chart) jchart .. raw:: html Selection Params ---------------- JupyterChart makes it possible to access and observe selection parameters. For the purpose of accessing selections from Python, selection parameters are divided into three types: Point selections, index selections, and interval selection. These selection types are represented by Python classes named ``PointSelection``, ``IndexSelection``, and ``IntervalSelection`` respectively. Instances of these selection classes are available as properties of the JupyterChart's ``selections`` property. Point Selections ~~~~~~~~~~~~~~~~ The ``PointSelection`` class is used to store the current state of a Vega-Altair point selection (as created by ``alt.selection_point()``) when either a ``fields`` or ``encodings`` specification is provided. One common example is a point selection with ``encodings=["color"]`` that is bound to the legend. .. code-block:: python import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_point(name="point", encodings=["color"], bind="legend") chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Origin:N").otherwise(alt.value("grey")), ).add_params(brush) jchart = alt.JupyterChart(chart) jchart .. raw:: html The ``PointSelection`` instance may be accessed as ``jchart.selections.point`` (Where "point" is the value of the ``name`` argument to ``alt.selection_point``). The ``jchart.selections.point.value`` property contains a list of dictionaries where each element represents a single point in the selection. This list of dictionaries may be converted into a pandas `query `_ string as follows .. code-block:: python filter = " or ".join([ " and ".join([ f"`{col}` == {repr(val)}" for col, val in sel.items() ]) for sel in jchart.selections.point.value ]) source.query(filter) For example, when the Japan and Europe legend entries are selected, the ``filter`` string above will evaluate to ``"`Origin` == 'Japan' or `Origin` == 'Europe'"``, and the ``source.query(filter)`` expression will evaluate to a pandas ``DataFrame`` containing the rows of ``source`` that are in the selection. Index Selections ~~~~~~~~~~~~~~~~ The ``IndexSelection`` class is used to store the current state of a Vega-Altair point selection (as created by ``alt.selection_point()``) when neither a ``fields`` nor ``encodings`` specification is provided. In this case, the ``value`` property of the selection is a list of the indices of the selected rows. These indices can be used with the pandas DataFrame's ``iloc`` attribute to extract the selected rows in the input DataFrame. .. code-block:: python import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_point(name="point") chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Origin:N").otherwise(alt.value("grey")), ).add_params(brush) jchart = alt.JupyterChart(chart) jchart .. raw:: html .. warning:: The indices returned will only correspond to the input DataFrame for charts that do not include aggregations. If a chart includes aggregations, then the ``alt.selection_point`` specification should include either a ``fields`` or ``encodings`` argument, which will result in the ``JupyterChart`` containing a ``PointSelection`` rather than an ``IndexSelection``. Interval Selections ~~~~~~~~~~~~~~~~~~~ The ``IntervalSelection`` class is used to store the current state of a Vega-Altair interval selection (as created by ``alt.selection_interval()``). In this case, the ``value`` property of the selection is a dictionary from column names to selection intervals .. code-block:: python import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval(name="interval") chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Cylinders:O").otherwise(alt.value("grey")), ).add_params(brush) jchart = alt.JupyterChart(chart) jchart .. raw:: html The selection dictionary may be converted into a pandas `query `_ string as follows .. code-block:: python filter = " and ".join([ f"{v[0]} <= `{k}` <= {v[1]}" for k, v in jchart.selections.interval.value.items() ]) source.query(filter) For example, when the x-selection is from 120 to 160 and the y-selection is from 25 to 35, ``jchart.selections.interval.value`` will be ``{'Horsepower': [120, 160], 'Miles_per_Gallon': [25, 30]}``, the ``filter`` string will be ``"120 <= `Horsepower` <= 160 and 25 <= `Miles_per_Gallon` <= 35"``, and the ``source.query(filter)`` expression will evaluate to a pandas ``DataFrame`` that contains the rows of ``source`` that are in the selection. Observing Selections ~~~~~~~~~~~~~~~~~~~~ As with variable parameters, it's possible to register a callback function to be invoked when a selection changes by using the ``observe`` method on the ``selections`` property. Here is an example that listens for changes to an interval selection, then uses the selection value to filter the input DataFrame and display it's HTML representation. An ipywidgets ``VBox`` is used to combine the chart and HTML table in a column layout. .. code-block:: python import ipywidgets from IPython.display import display from ipywidgets import HTML, VBox import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval(name="brush") chart_widget = alt.JupyterChart(alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Cylinders:O").otherwise(alt.value("grey")), ).add_params(brush)) table_widget = HTML(value=source.iloc[:0].to_html()) def on_select(change): sel = change.new.value if sel is None or 'Horsepower' not in sel: filtered = source.iloc[:0] else: filter_query = ( f"{sel['Horsepower'][0]} <= `Horsepower` <= {sel['Horsepower'][1]} and " f"{sel['Miles_per_Gallon'][0]} <= `Miles_per_Gallon` <= {sel['Miles_per_Gallon'][1]}" ) filtered = source.query(filter_query) table_widget.value = filtered.to_html() chart_widget.selections.observe(on_select, ["brush"]) VBox([chart_widget, table_widget]) .. raw:: html .. _user-guide-jupyterchart-offline: Offline Usage ------------- By default, the ``JupyterChart`` widget loads its JavaScript dependencies dynamically from a CDN location, which requires an active internet connection. Starting in Altair 5.3, JupyterChart supports loading its JavaScript dependencies from the ``vl-convert-python`` package, which enables offline usage. Offline mode is enabled using the ``JupyterChart.enable_offline`` class method. .. code-block:: python import altair as alt alt.JupyterChart.enable_offline() This only needs to be called once, after which all displayed JupyterCharts will operate in offline mode. Offline mode can be disabled by passing ``offline=False`` to this same method. .. code-block:: python import altair as alt alt.JupyterChart.enable_offline(offline=False) Limitations ----------- Setting Selections ~~~~~~~~~~~~~~~~~~ It's not currently possible to set selection states from Python. ================================================ FILE: doc/user_guide/interactions/parameters.rst ================================================ .. currentmodule:: altair .. _parameters: Parameters, Conditions, & Filters ================================= Parameters ~~~~~~~~~~ Parameters are the building blocks of interaction in Altair. There are two types of parameters: *variables* and *selections*. We introduce these concepts through a series of examples. .. note:: This material was changed considerably with the release of Altair 5. .. _basic variable: Variables: Reusing Values ^^^^^^^^^^^^^^^^^^^^^^^^^ Variable parameters allow for a value to be defined once and then reused throughout the rest of the chart. Here is a simple scatter-plot created from the ``cars`` dataset: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_circle().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) Variable parameters are created using the :func:`param` function. Here, we create a parameter with a default value of 0.1 using the ``value`` property: .. altair-plot:: :output: none op_var = alt.param(value=0.1) In order to use this variable in the chart specification, we explicitly add it to the chart using the :meth:`add_params` method, and we can then reference the variable within the chart specification. Here we set the opacity using our ``op_var`` parameter. The :meth:`add_params` method adds the parameter to the chart, making it available for use in encodings, conditions, and filters. .. altair-plot:: op_var = alt.param(value=0.1) alt.Chart(cars).mark_circle(opacity=op_var).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).add_params( op_var ) It's reasonable to ask whether all this effort is necessary. Here is a more natural way to accomplish the same thing that avoids the use of both :func:`param` and ``add_params``. .. altair-plot:: op_var2 = 0.1 alt.Chart(cars).mark_circle(opacity=op_var2).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) The benefit of using :func:`param` doesn't become apparent until we incorporate an additional component. In the following example we use the ``bind`` property of the parameter, so that the parameter becomes bound to an input element. In this example, that input element is a slider widget. .. altair-plot:: slider = alt.binding_range(min=0, max=1, step=0.05, name='opacity:') op_var = alt.param(value=0.1, bind=slider) alt.Chart(cars).mark_circle(opacity=op_var).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).add_params( op_var ) Now we can dynamically change the opacity of the points in our chart using the slider. You will learn much more about binding parameters to input elements such as widgets in the section :ref:`binding-parameters`. .. note:: A noteworthy aspect of Altair's interactivity is that these effects are controlled entirely within the web browser. This means that you can save charts as HTML files and share them with your colleagues who can access the interactivity via their browser without the need to install Python. Selections: Capturing Chart Interactions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Selection parameters define data queries that are driven by interactive manipulation of the chart by the user (e.g., via mouse clicks or drags). There are two types of selections: :func:`selection_interval` and :func:`selection_point`. Here we will create a simple chart and then add a selection interval to it. We could create a selection interval via ``param(select="interval")``, but it is more convenient to use the shorter ``selection_interval``. Here is a simple scatter-plot created from the ``cars`` dataset: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) First we'll create an interval selection using the :func:`selection_interval` function (an interval selection is also referred to as a "brush"): .. altair-plot:: :output: none brush = alt.selection_interval() We can now add this selection interval to our chart via ``add_params``: .. altair-plot:: alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).add_params( brush ) The result above is a chart that allows you to click and drag to create a selection region, and to move this region once the region is created. So far this example is very similar to what we did in the :ref:`variable example `: we created a selection parameter using ``brush = alt.selection_interval()``, and we attached that parameter to the chart using ``add_params``. One difference is that here we have not defined how the chart should respond to the selection; you will learn this in the next section. .. _conditions: Conditions ~~~~~~~~~~ .. note:: This material was changed considerably with the release of Altair ``5.5.0``. :func:`when` was introduced in ``5.4.0`` and should be preferred over :func:`condition`. The example above is neat, but the selection interval doesn't actually *do* anything yet. To make the chart respond to this selection, we need to reference ``brush`` within the chart specification. Here, we will use the :func:`when` function to create a conditional color encoding: .. altair-plot:: conditional = alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")) alt.Chart(cars).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", color=conditional, ).add_params( brush ) As you can see, the color of the points now changes depending on whether they are inside or outside the selection. Above we are using the selection parameter ``brush`` as a *predicate* (something that evaluates as `True` or `False`). This is controlled by our definition ``conditional``:: conditional = alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")) Data points which fall within the selection evaluate as ``True``, and data points which fall outside the selection evaluate to ``False``. The ``"Origin:N"`` specifies how to color the points which fall within the selection, and the ``alt.value('lightgray')`` specifies that the outside points should be given a constant color value. Understanding :func:`when` ^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``when-then-otherwise`` syntax was directly inspired by `polars.when`_, and is similar to an ``if-else`` statement written in Python:: # alt.when(brush) if brush: # .then("Origin:N") color = "Origin:N" else: # .otherwise(alt.value("lightgray")) color = alt.value("lightgray") Omitting the ``.otherwise()`` clause will use the channel default instead: .. altair-plot:: source = data.cars() brush = alt.selection_interval() points = alt.Chart(source).mark_point().encode( x="Horsepower", y="Miles_per_Gallon", color=alt.when(brush).then(alt.value("goldenrod")) ).add_params( brush ) points Multiple conditional branches (``if, elif, ..., elif`` in Python) are expressed via chained calls to :func:`when`. You will see an example with working code in :ref:`conditional-branches` when you have learned about different selection types. More advanced use of conditions can be found in the :func:`when` API reference and in these gallery examples: - :ref:`gallery_dot_dash_plot` - :ref:`gallery_interactive_bar_select_highlight` - :ref:`gallery_multiline_tooltip_standard` - :ref:`gallery_scatter_point_paths_hover` - :ref:`gallery_waterfall_chart` Linking Conditions Across Charts ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Conditional encodings become even more powerful when the selection behavior is tied across multiple views of the data within a compound chart. For example, here we create a :class:`Chart` using the same code as above, and horizontally concatenate two versions of this chart: one with the x-encoding tied to ``"Horsepower"``, and one with the x-encoding tied to ``"Acceleration"`` .. altair-plot:: chart = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")), ).properties( width=250, height=250 ).add_params( brush ) chart | chart.encode(x='Acceleration:Q') Because both copies of the chart reference the same selection object, the renderer ties the selections together across panels, leading to a dynamic display that helps you gain insight into the relationships within the dataset. Each selection type has attributes through which its behavior can be customized; for example we might wish for our brush to be tied only to the ``"x"`` encoding to emphasize that feature in the data. We can modify the brush definition, and leave the rest of the code unchanged: .. altair-plot:: brush = alt.selection_interval(encodings=['x']) chart = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")), ).properties( width=250, height=250 ).add_params( brush ) chart | chart.encode(x='Acceleration:Q') As you might have noticed, the selected points are sometimes obscured by some of the unselected points. To bring the selected points to the foreground, we can change the order in which they are laid out via the following conditional ``order`` encoding: .. altair-plot:: # Lower numbers for `order` means further into the background selected_on_top = alt.when(brush).then(alt.value(1)).otherwise(alt.value(0)) chart.encode(order=selected_on_top) | chart.encode(order=selected_on_top, x='Acceleration:Q') You can see another example of this in the :ref:`gallery_selection_zorder` gallery example. Filters ~~~~~~~ Using a selection parameter to filter data works in much the same way as using it within :func:`when`. For example, in ``transform_filter(brush)``, we are again using the selection parameter ``brush`` as a predicate. Data points which evaluate to ``True`` (i.e., data points which lie within the selection) are kept, and data points which evaluate to ``False`` are filtered out. It is not possible to both select and filter in the same chart, so typically this functionality will be used when at least two sub-charts are present. In the following example, we attach the selection parameter to the upper chart, and then filter data in the lower chart based on the selection in the upper chart. You can explore how the counts change in the bar chart depending on the size and position of the selection in the scatter plot. .. altair-plot:: brush = alt.selection_interval() points = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).add_params( brush ) bars = alt.Chart(cars).mark_bar().encode( x='count()', y='Origin:N', color='Origin:N' ).transform_filter( brush ) points & bars Selection Types ~~~~~~~~~~~~~~~ Now that we have seen the basics of how we can use a selection to interact with a chart, let's take a more systematic look at the types of selection parameters available in Altair. There are two main types of selections you'll work with through mouse actions: 1. **Interval selections**: Select ranges of data by clicking and dragging 2. **Point selections**: Select individual data points by clicking or hovering To demonstrate these selection types clearly, we'll use a consistent visualization approach. The following helper function creates a heatmap from the ``cars`` dataset where the color encoding responds to our selection: .. altair-plot:: :output: none def make_example(selector: alt.Parameter) -> alt.Chart: cars = data.cars.url return alt.Chart(cars).mark_rect().encode( x="Cylinders:O", y="Origin:N", color=alt.when(selector).then("count()").otherwise(alt.value("lightgray")), ).properties( width=300, height=180 ).add_params( selector ) Interval Selections ^^^^^^^^^^^^^^^^^^^ An *interval* selection allows you to select a range of data elements by clicking and dragging. This is often referred to as a "brush" selection and is commonly used for filtering, zooming, or highlighting ranges of data points. Basic Interval Selection """""""""""""""""""""""" The simplest interval selection is created with the :func:`selection_interval` function: .. altair-plot:: interval = alt.selection_interval() make_example(interval) As you click and drag on the plot, your mouse creates a selection region that can be moved to change which data points are selected. Customizing Interval Behavior """"""""""""""""""""""""""""" The :func:`selection_interval` function accepts several arguments for customization. For example, you can limit the selection to only the x-axis and set it so that an empty selection contains no points: .. altair-plot:: interval_x = alt.selection_interval(encodings=['x'], empty=False) make_example(interval_x) The ``empty=False`` argument can also be set inside :func:`when` to control how individual conditions respond to empty selections. By default, `empty=True`, which means the selection predicate evaluates to true for *all* data points when the selection is empty. With `empty=False`, the predicate evaluates to false for all points when the selection is empty. This affects what happens when no data points are selected yet:: brush = alt.selection_interval() # Default behavior (empty=True): When no selection is made in this example, all points will be colored according to their "Origin" category (they are considered as *selected*, and colored according to the `then` clause) color=alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")) # Custom behavior (empty=False): When no selection is made in this example, all points will be lightgray (they are considered as *unselected*, and colored according to the `otherwise` clause ) color=alt.when(brush, empty=False).then("Origin:N").otherwise(alt.value("lightgray")) Setting Initial Values """""""""""""""""""""" When creating interactive visualizations, you often want to guide users by starting with a pre-selected region rather than an empty selection. You can do this using the ``value`` parameter of :func:`selection_interval`. Setting initial values is particularly useful when: - Creating a filtered overview-detail pattern where a detailed subset is shown - Guiding viewer attention to a particular area of interest immediately - Preserving selection states across different visualizations or user sessions Here's an example using time series data where we initially focus on a specific date range containing the 2008 financial crisis: .. altair-plot:: import altair as alt from altair.datasets import data import datetime as dt source = data.sp500.url # Define initial date range to select date_range = (dt.date(2007, 6, 30), dt.date(2009, 6, 30)) # Create interval selection with initial value brush = alt.selection_interval( encodings=['x'], value={'x': date_range} ) # Create base chart for both panels base = alt.Chart(source, width=600, height=200).mark_area().encode( x = 'date:T', y = 'price:Q' ) # Upper panel shows detailed view filtered by the brush upper = base.encode( alt.X('date:T').scale(domain=brush) ) # Lower panel shows overview with the brush control lower = base.properties( height=60 ).add_params(brush) # Combine the two charts upper & lower This example demonstrates an "overview+detail" pattern - a common visualization technique where one chart shows the complete dataset while another shows a detailed view of the selected portion. When working with datetime values, you can use Python's native ``datetime.date`` or ``datetime.datetime`` objects directly. Altair automatically handles the conversion of these objects to the appropriate format for Vega-Lite. The format of the ``value`` parameter depends on the encodings used in the selection: - For selections with `x` encoding: ``value={'x': [min, max]}`` - For selections with `y` encoding: ``value={'y': [min, max]}`` - For selections with both: ``value={'x': [xmin, xmax], 'y': [ymin, ymax]}`` You can also set initial values of a selection with categorical data. In this case the list/tuple specifies the exact categories to be selection rather than a range of categories. For example, to initially select only cars with 4 or 6 cylinders: .. altair-plot:: cylinder_select = alt.selection_interval( encodings=['x'], value={"x": [4, 6]} # Select 4 and 6 (but not 5) cylinder cars ) make_example(cylinder_select) Point Selections ^^^^^^^^^^^^^^^^ A *point* selection allows you to select individual data elements one at a time. Unlike interval selections which select ranges, point selections work with discrete data points, making them ideal for interactive legends, tooltips, and highlighting specific data points. Basic Point Selection """"""""""""""""""""" The simplest point selection is created with the :func:`selection_point` function. By default, points are selected on click: .. altair-plot:: point = alt.selection_point() make_example(point) By changing the ``on`` parameter to `'pointerover'`, we can select points when hovering over them rather than on click. The ``on`` parameter accepts `any "Event Stream Selector" supported by Vega `_ and you can see more elaborate example at the end of the :ref:`parameter-composition` section. We can also set the ``nearest`` flag to ``True`` so that the nearest point is highlighted instead of when the pointer is directly touching a point: .. altair-plot:: point_nearest = alt.selection_point(on='pointerover', nearest=True) make_example(point_nearest) Point selections also support multi-selection. By default, you can add or remove data points from your selection by holding the *shift* key while clicking. Try this behavior in the examples above to see how it works. Setting Initial Values """""""""""""""""""""" You can also set initial values for point selections using the ``value`` parameter. For point selections, the value is a dictionary specifying the initial selection based on fields or encodings. .. altair-plot:: point_initial = alt.selection_point( fields=['Origin'], value=[{'Origin': 'USA'}] # Initially select USA ) make_example(point_initial) Selection Targets ~~~~~~~~~~~~~~~~~ For any but the simplest selections, the user needs to think about exactly what is targeted by the selection, and this can be controlled with either the ``fields`` or ``encodings`` arguments. These control what data properties are used to determine which points are part of the selection. For example, here we create a small chart that acts as an interactive legend, by targeting the Origin field using ``fields=['Origin']``. Clicking on points in the upper-right plot (the legend) will propagate a selection for all points with a matching ``Origin``. .. altair-plot:: selection = alt.selection_point(fields=['Origin']) color = ( alt.when(selection) .then(alt.Color("Origin:N").legend(None)) .otherwise(alt.value("lightgray")) ) scatter = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=color, tooltip='Name:N' ) legend = alt.Chart(cars).mark_point().encode( alt.Y('Origin:N').axis(orient='right'), color=color ).add_params( selection ) scatter | legend Alternatively, we could express ``fields=['Origin']`` as ``encodings=['color']``, because our chart maps ``color`` to ``'Origin'``. Also note that there is a shortcut to create interactive legends in Altair described in the section :ref:`legend-binding`. Similarly, we can specify multiple fields and/or encodings that must be matched in order for a datum to be included in a selection. For example, we could modify the above chart to create a two-dimensional clickable legend that will select points by both Origin and number of cylinders: .. altair-plot:: selection = alt.selection_point(fields=['Origin', 'Cylinders']) color = ( alt.when(selection) .then(alt.Color("Origin:N").legend(None)) .otherwise(alt.value("lightgray")) ) scatter = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=color, tooltip='Name:N' ) legend = alt.Chart(cars).mark_rect().encode( alt.Y('Origin:N').axis(orient='right'), x='Cylinders:O', color=color ).add_params( selection ) scatter | legend By fine-tuning the behavior of selections in this way, they can be used to create a wide variety of linked interactive chart types. Combining Parameters ~~~~~~~~~~~~~~~~~~~~ Multiple parameters can be combined in a single chart, either via multiple separate response conditions, different conditional branches in :func:`when`, or parameter composition. Multiple conditions ^^^^^^^^^^^^^^^^^^^ In this example, points that are hovered with the pointer will increase in size and those that are clicked will be filled in with red. The ``empty=False`` is to ensure that no points are selected to start. Try holding shift to select multiple points on either hover or click. .. altair-plot:: click = alt.selection_point(empty=False) hover = alt.selection_point(on='pointerover', empty=False) points = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', fill=alt.when(click).then(alt.value('red')), size=alt.when(hover).then(alt.value(1000)) ).add_params( click, hover ) points .. _conditional-branches: Conditional branches ^^^^^^^^^^^^^^^^^^^^ :func:`when` allows the use of multiple ``then`` (``elif``) branches which can change the behavior of a single encoding in response to multiple different parameters. Here, we fill hovered points in yellow, before changing the fill to red when a point is clicked. Since the mouse is hovering over points while clicking them, both conditions will be active and the earlier branch takes precedence (you can try by changing the order of the two ``when.then`` clauses and observing that the points will not change to red when clicked). .. altair-plot:: click = alt.selection_point(empty=False) hover = alt.selection_point(on='pointerover', empty=False) points = alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', fill=( alt.when(click) .then(alt.value('red')) .when(hover) .then(alt.value('gold')) ), size=alt.when(hover).then(alt.value(1000)) ).add_params( click, hover ) points .. _parameter-composition: Parameter Composition ^^^^^^^^^^^^^^^^^^^^^ Altair also supports combining multiple parameters using the ``&``, ``|`` and ``~`` for respectively ``AND``, ``OR`` and ``NOT`` logical composition operands. These parameter compositions can be used with both filters and conditions in Altair. In the following example, only the points that fall within the interval selections of both the scatter plots will be counted in the bar chart (so you will need to make a selection in both charts before the bars shows up). .. altair-plot:: # empty=False ensure that no points are selected before a selection is drawn brush = alt.selection_interval(empty=False) brush2 = alt.selection_interval(empty=False) points = alt.Chart(cars).mark_point(size=10).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) points2 = points.encode( x='Acceleration:Q', y='Miles_per_Gallon:Q', ) bars = alt.Chart(cars).mark_bar().encode( x='count()', y='Origin:N', color='Origin:N' ).transform_filter( brush & brush2 ) (points.add_params(brush) | points2.add_params(brush2)) & bars To illustrate how a more complex parameter composition can be applied to a conditional encoding, we can return to our heatmap example. Let's construct a scenario where there are two people who can make an interval selection in the same chart. The person Alex makes a selection box when the alt-key (macOS: option-key) is selected and Morgan can make a selection box when the shift-key is selected. Now, we color the chart marks when they fall within Alex's or Morgan's selection, but not both's (note that you need to create both selections before seeing the effect). Here, we also use We use :class:`BrushConfig` to give the selection box of Morgan a different style to be able to tell them apart. .. altair-plot:: alex = alt.selection_interval( on="[pointerdown[event.altKey], pointerup] > pointermove", empty=False, ) morgan = alt.selection_interval( on="[pointerdown[event.shiftKey], pointerup] > pointermove", mark=alt.BrushConfig(fill="#fdbb84", fillOpacity=0.5, stroke="#e34a33"), empty=False, ) exlusive_or = (alex | morgan) & ~(alex & morgan) alt.Chart(cars).mark_rect().encode( x='Cylinders:O', y='Origin:O', color=alt.when(exlusive_or).then("count()").otherwise(alt.value("grey")), ).add_params( alex, morgan ).properties( width=300, height=180 ) With these operators, selections can be combined in arbitrary ways: - ``alex | morgan``: to select the rectangles that fall inside either Alex's or Morgans' selection. - ``~(alex & morgan)``: to select the rectangles that fall outside Alex's and Morgan's selections. - ``alex | ~morgan``: to select the rectangles that fall within Alex's selection or outside the selection of Morgan For more information on how to fine-tune selections, including specifying other mouse and keystroke options, see the `Vega-Lite Selection documentation `_. Also see :class:`BrushConfig` for information on how to customize the appearance of the brush. .. _polars.when: https://docs.pola.rs/py-polars/html/reference/expressions/api/polars.when.html ================================================ FILE: doc/user_guide/internals.rst ================================================ .. currentmodule:: altair .. _user-guide-internals: Altair Internals ================ This section will provide some details about how the Altair API relates to the Vega-Lite visualization specification, and how you can use that knowledge to use the package more effectively. First of all, it is important to realize that when stripped down to its core, Altair itself cannot render visualizations. Altair is an API that does one very well-defined thing: - **Altair provides a Python API for generating validated Vega-Lite specifications** That's it. In order to take those specifications and turn them into actual visualizations requires a frontend that is correctly set up, but strictly speaking that rendering is generally not controlled by the Altair package. Altair Chart to Vega-Lite Spec ------------------------------ Since Altair is fundamentally about constructing chart specifications, the central functionality of any chart object are the :meth:`~Chart.to_dict` and :meth:`~Chart.to_json` methods, which output the chart specification as a Python dict or JSON string, respectively. For example, here is a simple scatter chart, from which we can output the JSON representation: .. altair-plot:: :output: stdout import altair as alt from altair.datasets import data chart = alt.Chart(data.cars.url).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', ).configure_view( continuousHeight=300, continuousWidth=300, ) print(chart.to_json(indent=2)) Before returning the dict or JSON output, Altair validates it against the `Vega-Lite schema `_ using the `jsonschema `_ package. The Vega-Lite schema defines valid attributes and values that can appear within the specification of a Vega-Lite chart. With the JSON schema in hand, it can then be passed to a library such as `Vega-Embed `_ that knows how to read the specification and render the chart that it describes, and the result is the following visualization: .. altair-plot:: :hide-code: chart Whenever you use Altair within JupyterLab, Jupyter notebook, or other frontends, it is frontend extensions that extract the JSON output from the Altair chart object and pass that specification along to the appropriate rendering code. Altair's Low-Level Object Structure ----------------------------------- The standard API methods used in Altair (e.g. :meth:`~Chart.mark_point`, :meth:`~Chart.encode`, ``configure_*()``, ``transform_*()``, etc.) are higher-level convenience functions that wrap the low-level API. That low-level API is essentially a Python object hierarchy that mirrors that of the JSON schema definition. For example, we can choose to avoid the convenience methods and rather construct the above chart using these low-level object types directly: .. altair-plot:: alt.Chart( data=alt.UrlData( url='https://vega.github.io/vega-datasets/data/cars.json' ), mark='point', encoding=alt.FacetedEncoding( x=alt.PositionFieldDef( field='Horsepower', type='quantitative' ), y=alt.PositionFieldDef( field='Miles_per_Gallon', type='quantitative' ), color=alt.StringFieldDefWithCondition( field='Origin', type='nominal' ) ), config=alt.Config( view=alt.ViewConfig( continuousHeight=300, continuousWidth=300 ) ) ) This low-level approach is much more verbose than the typical idiomatic approach to creating Altair charts, but it makes much more clear the mapping between Altair's python object structure and Vega-Lite's schema definition structure. One of the nice features of Altair is that this low-level object hierarchy is not constructed by hand, but rather *programmatically generated* from the Vega-Lite schema, using the ``generate_schema_wrapper.py`` script that you can find in `Altair's repository `_. This auto-generation of code propagates descriptions from the vega-lite schema into the Python class docstrings, from which the `API Reference `_ within Altair's documentation are in turn automatically generated. This means that as the Vega-Lite schema evolves, Altair can very quickly be brought up-to-date, and only the higher-level chart methods need to be updated by hand. Converting Vega-Lite to Altair ------------------------------ With this knowledge in mind, and with a bit of practice, it is fairly straightforward to construct an Altair chart from a Vega-Lite spec. For example, consider the `Simple Bar Chart `_ example from the Vega-Lite documentation, which has the following JSON specification:: { "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "description": "A simple bar chart with embedded data.", "data": { "values": [ {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} ] }, "mark": {"type": "bar"}, "encoding": { "x": {"field": "a", "type": "ordinal"}, "y": {"field": "b", "type": "quantitative"} } } At the lowest level, we can use the :meth:`~Chart.from_json` class method to construct an Altair chart object from this string of Vega-Lite JSON: .. altair-plot:: import altair as alt alt.Chart.from_json(""" { "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "description": "A simple bar chart with embedded data.", "data": { "values": [ {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} ] }, "mark": {"type": "bar"}, "encoding": { "x": {"field": "a", "type": "ordinal"}, "y": {"field": "b", "type": "quantitative"} } } """) Likewise, if you have the Python dictionary equivalent of the JSON string, you can use the :meth:`~Chart.from_dict` method to construct the chart object: .. altair-plot:: import altair as alt alt.Chart.from_dict({ "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "description": "A simple bar chart with embedded data.", "data": { "values": [ {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} ] }, "mark": {"type": "bar"}, "encoding": { "x": {"field": "a", "type": "ordinal"}, "y": {"field": "b", "type": "quantitative"} } }) With a bit more effort and some judicious copying and pasting, we can manually convert this into more idiomatic Altair code for the same chart, including constructing a pandas dataframe from the data values: .. altair-plot:: import altair as alt import pandas as pd data = pd.DataFrame.from_records([ {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43}, {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53}, {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52} ]) alt.Chart(data).mark_bar().encode( x='a:O', y='b:Q' ) The key is to realize that ``"encoding"`` properties are usually set using the :meth:`~Chart.encode` method, encoding types are usually computed from short-hand type codes, ``"transform"`` and ``"config"`` properties come from the ``transform_*()`` and ``configure_*()`` methods, and so on. This approach is the process by which Altair contributors constructed many of the initial examples in the `Altair Example Gallery `_, drawing inspiration from the `Vega-Lite Example Gallery `_. Becoming familiar with the mapping between Altair and Vega-Lite at this level is useful in making use of the Vega-Lite documentation in places where Altair's documentation is weak or incomplete. ================================================ FILE: doc/user_guide/large_datasets.rst ================================================ .. _large-datasets: Large Datasets -------------- If you try to create a plot that will directly embed a dataset with more than 5000 rows, you will see a ``MaxRowsError``: .. altair-plot:: :output: none import altair as alt import pandas as pd data = pd.DataFrame({"x": range(10000)}) alt.Chart(data).mark_point() .. code-block:: none MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). Try enabling the VegaFusion data transformer which raises this limit by pre-evaluating data transformations in Python. >> import altair as alt >> alt.data_transformers.enable("vegafusion") Or, see https://altair-viz.github.io/user_guide/large_datasets.html for additional information on how to plot large datasets. This is not because Altair cannot handle larger datasets, but it is because it is important for the user to think carefully about how large datasets are handled. The following sections describe various considerations as well as approaches to deal with large datasets. If you are certain you would like to embed your full untransformed dataset within the visualization specification, you can disable the ``MaxRows`` check:: alt.data_transformers.disable_max_rows() Challenges ~~~~~~~~~~ By design, Altair does not produce plots consisting of pixels, but plots consisting of data plus a visualization specification. For example, here is a simple chart made from a dataframe with three rows of data: .. altair-plot:: :output: none import altair as alt import pandas as pd data = pd.DataFrame({'x': [1, 2, 3], 'y': [2, 1, 2]}) chart = alt.Chart(data).mark_line().encode( x='x', y='y' ) from pprint import pprint pprint(chart.to_dict()) .. code-block:: none {'$schema': 'https://vega.github.io/schema/vega-lite/v2.4.1.json', 'config': {'view': {'height': 300, 'width': 300}}, 'data': {'values': [{'x': 1, 'y': 2}, {'x': 2, 'y': 1}, {'x': 3, 'y': 2}]}, 'encoding': {'x': {'field': 'x', 'type': 'quantitative'}, 'y': {'field': 'y', 'type': 'quantitative'}}, 'mark': 'line'} The resulting specification includes a representation of the data converted to JSON format, and this specification is embedded in the notebook or web page where it can be used by Vega-Lite to render the plot. As the size of the data grows, this explicit data storage can lead to some very large specifications which can have various negative implications: * large notebook files which can slow down your notebook environment such as JupyterLab * if you display the chart on a website it slows down the loading of the page * slow evaluation of transforms as the calculations are performed in JavaScript which is not the fastest language for processing large amounts of data .. _vegafusion-data-transformer: VegaFusion Data Transformer ~~~~~~~~~~~~~~~~~~~~~~~~~~~ The easiest and most flexible approach for addressing a ``MaxRowsError`` is to enable the ``"vegafusion"`` data transformer, which was added in Altair 5.1. `VegaFusion`_ is an external project that provides efficient Rust implementations of most of Altair's data transformations. By evaluating data transformations (e.g. binning and aggregations) in Python, the size of the datasets that must be included in the final chart specification are often greatly reduced. In addition, VegaFusion automatically removes unused columns, which reduces dataset size even for charts without data transformations. When the ``"vegafusion"`` data transformer is active, data transformations will be pre-evaluated when :ref:`displaying-charts`, :ref:`user-guide-saving`, converted charts a dictionaries, and converting charts to JSON. When combined with :ref:`user-guide-jupyterchart` or the ``"jupyter"`` renderer (See :ref:`customizing-renderers`), data transformations will also be evaluated in Python dynamically in response to chart selection events. VegaFusion's development is sponsored by `Hex `_. Installing VegaFusion ^^^^^^^^^^^^^^^^^^^^^ The VegaFusion dependencies can be installed using pip .. code-block:: none pip install vegafusion or conda .. code-block:: none conda install -c conda-forge vegafusion vl-convert-python Enabling the VegaFusion Data Transformer ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Activate the VegaFusion data transformer with: .. code-block:: python import altair as alt alt.data_transformers.enable("vegafusion") All charts created after activating the VegaFusion data transformer will work with datasets containing up to 100,000 rows. VegaFusion's row limit is applied after all supported data transformations have been applied. So you are unlikely to reach it with a chart such as a histogram, but you may hit it in the case of a large scatter chart or a chart that includes interactivity when not using ``JupyterChart`` or the ``"jupyter"`` renderer. If you need to work with larger datasets, you can disable the maximum row limit or switch to using ``JupyterChart`` or the ``"jupyter"`` renderer described below. Converting to JSON or dictionary ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When converting a VegaFusion chart to JSON with ``chart.to_json`` or to a Python dictionary with ``chart.to_dict``, the ``format`` argument must be set to ``"vega"`` rather than the default of ``"vega-lite"``. For example: .. code-block:: python chart.to_json(format="vega") chart.to_dict(format="vega") This is because VegaFusion works with Vega chart specifications rather than the Vega-Lite specifications produced by Altair. When the VegaFusion data transformer is enabled, the `vl-convert`_ library is used to perform the conversion from Vega-Lite to Vega. Local Timezone Configuration ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Some Altair transformations (e.g. :ref:`user-guide-timeunit-transform`) are based on a local timezone. Normally, the browser's local timezone is used. However, because VegaFusion evaluates these transforms in Python before rendering, it's not always possible to access the browser's timezone. Instead, the local timezone of the Python kernel will be used by default. In the case of a cloud notebook service, this may be difference than the browser's local timezone. VegaFusion's local timezone may be customized using the ``vegafusion.set_local_tz`` function. For example: .. code-block:: python import vegafusion as vf vf.set_local_tz("America/New_York") When using ``JupyterChart`` or the ``"jupyter"`` renderer, the browser's local timezone is used. DuckDB Integration ^^^^^^^^^^^^^^^^^^ VegaFusion provides optional integration with `DuckDB`_. Because DuckDB can perform queries on pandas DataFrames without converting through Arrow, it's often faster than VegaFusion's default query engine which requires this conversion. See the `VegaFusion DuckDB`_ documentation for more information. Interactivity ^^^^^^^^^^^^^ When using the default ``"html"`` renderer with charts that use selections to filter data interactively, the VegaFusion data transformer will include all of the data that participates in the interaction in the resulting chart specification. This makes it an unsuitable approach for building interactive charts that filter large datasets (e.g. crossfiltering a dataset with over a million rows). The ``JupyterChart`` widget and the ``"jupyter"`` renderer are designed to work with the VegaFusion data transformer to evaluate data transformations interactively in response to selection events. This avoids the need to transfer the full dataset to the browser, and so supports interactive exploration of aggregated datasets on the order of millions of rows. Either use ``JupyterChart`` directly: .. code-block:: python import altair as alt alt.data_transformers.enable("vegafusion") ... alt.JupyterChart(chart) Or, enable the ``"jupyter"`` renderer and display charts as usual: .. code-block:: python import altair as alt alt.data_transformers.enable("vegafusion") alt.renderers.enable("jupyter") ... chart Charts rendered this way require a running Python kernel and Jupyter Widget extension to display, which works in many frontends including locally in the classic notebook, JupyterLab, and VSCode, as well as remotely in Colab and Binder. .. _passing-data-by-url: Passing Data by URL ~~~~~~~~~~~~~~~~~~~ A common approach when working with large datasets is to not embed the data directly, but rather store it separately and pass it to the chart by URL. This not only addresses the issue of large notebooks, but also leads to better interactivity performance with large datasets. Local Data Server ^^^^^^^^^^^^^^^^^ A convenient way to do this is by using the `altair_data_server `_ package. It serves your data from a local threaded server. First install the package: .. code-block:: none pip install altair_data_server And then enable the data transformer:: import altair as alt alt.data_transformers.enable('data_server') Note that this approach may not work on some cloud-based Jupyter notebook services. A disadvantage of this method is that if you reopen the notebook, the plot may no longer display as the data server is no longer running. Local Filesystem ^^^^^^^^^^^^^^^^ You can also persist the data to disk and then pass the path to Altair: .. altair-plot:: :output: none url = 'data.json' data.to_json(url, orient='records') chart = alt.Chart(url).mark_line().encode( x='x:Q', y='y:Q' ) pprint(chart.to_dict()) .. code-block:: none {'$schema': 'https://vega.github.io/schema/vega-lite/v2.4.1.json', 'config': {'view': {'height': 300, 'width': 300}}, 'data': {'url': 'data.json'}, 'encoding': {'x': {'field': 'x', 'type': 'quantitative'}, 'y': {'field': 'y', 'type': 'quantitative'}}, 'mark': 'line'} Altair also has a ``JSON`` data transformer that will do this transparently when enabled:: alt.data_transformers.enable('json') There is a similar CSV data transformer, but it must be used more carefully because CSV does not preserve data types as JSON does. Note that the filesystem approach may not work on some cloud-based Jupyter notebook services. A disadvantage of this method is also a loss of portability: if the notebook is ever moved, the data file must accompany it or the plot may not display. Vega Datasets ^^^^^^^^^^^^^ If you are working with one of the vega datasets, you can pass the data by URL using the ``url`` attribute: .. code-block:: python from altair.datasets import data source = data.cars.url alt.Chart(source).mark_point() # etc. PNG and SVG Renderers ~~~~~~~~~~~~~~~~~~~~~ The approaches presented in :ref:`passing-data-by-url` have the disadvantage that the data is no longer contained in the notebook and you therefore lose portability or don't see the charts when you reopen the notebook. Furthermore, the data still needs to be sent to the frontend, e.g. your browser, and any calculations will happen there. You might achieve a speedup by enabling either the PNG or SVG renderer as described in :ref:`renderers`. Instead of a Vega-Lite specification, they will prerender the visualization and send only a static image to your notebook. This can greatly reduce the amount of data that is being transmitted. The downside with this approach is, that you loose all interactivity features of Altair. Both renderers require you to install the `vl-convert`_ package, see :ref:`saving-png`. .. _preaggregate-and-filter: Preaggregate and Filter in pandas ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Another common approach is to perform data transformations such as aggregations and filters using pandas before passing the data to Altair. For example, to create a bar chart for the ``barley`` dataset summing up ``yield`` grouped by ``site``, it is convenient to pass the unaggregated data to Altair: .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x="sum(yield):Q", y=alt.Y("site:N").sort("-x") ) The above works well for smaller datasets but let's imagine that the ``barley`` dataset is larger and the resulting Altair chart slows down your notebook environment. To reduce the data being passed to Altair, you could subset the dataframe to only the necessary columns: .. code-block:: python alt.Chart(source[["yield", "site"]]).mark_bar().encode( x="sum(yield):Q", y=alt.Y("site:N").sort("-x") ) You could also precalculate the sum in pandas which would reduce the size of the dataset even more: .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() source_aggregated = ( source.groupby("site")["yield"].sum().rename("sum_yield").reset_index() ) alt.Chart(source_aggregated).mark_bar().encode( x="sum_yield:Q", y=alt.Y("site:N").sort("-x") ) Preaggregate Boxplot ^^^^^^^^^^^^^^^^^^^^ A boxplot is a useful way to visualize the distribution of data and it is simple to create in Altair. .. altair-plot:: import altair as alt from altair.datasets import data df = data.cars() alt.Chart(df).mark_boxplot().encode( x="Miles_per_Gallon:Q", y="Origin:N", color=alt.Color("Origin").legend(None) ) If you have a lot of data, you can perform the necessary calculations in pandas and only pass the resulting summary statistics to Altair. First, let's define a few parameters where ``k`` stands for the multiplier which is used to calculate the boundaries of the whiskers. .. altair-plot:: :output: none import altair as alt import pandas as pd from altair.datasets import data k = 1.5 group_by_column = "Origin" value_column = "Miles_per_Gallon" In the next step, we will calculate the summary statistics which are needed for the boxplot. .. altair-plot:: :output: repr :chart-var-name: agg_stats df = data.cars() agg_stats = df.groupby(group_by_column)[value_column].describe() agg_stats["iqr"] = agg_stats["75%"] - agg_stats["25%"] agg_stats["min_"] = agg_stats["25%"] - k * agg_stats["iqr"] agg_stats["max_"] = agg_stats["75%"] + k * agg_stats["iqr"] data_points = df[[value_column, group_by_column]].merge( agg_stats.reset_index()[[group_by_column, "min_", "max_"]] ) # Lowest data point which is still above or equal to min_ # This will be the lower end of the whisker agg_stats["lower"] = ( data_points[data_points[value_column] >= data_points["min_"]] .groupby(group_by_column)[value_column] .min() ) # Highest data point which is still below or equal to max_ # This will be the upper end of the whisker agg_stats["upper"] = ( data_points[data_points[value_column] <= data_points["max_"]] .groupby(group_by_column)[value_column] .max() ) # Store all outliers as a list agg_stats["outliers"] = ( data_points[ (data_points[value_column] < data_points["min_"]) | (data_points[value_column] > data_points["max_"]) ] .groupby(group_by_column)[value_column] .apply(list) ) agg_stats = agg_stats.reset_index() # Show whole dataframe pd.set_option("display.max_columns", 15) print(agg_stats) And finally, we can create the same boxplot as above but we only pass the calculated summary statistics to Altair instead of the full dataset. .. altair-plot:: base = alt.Chart(agg_stats).encode( y="Origin:N" ) rules = base.mark_rule().encode( x=alt.X("lower").title("Miles_per_Gallon"), x2="upper", ) bars = base.mark_bar(size=14).encode( x="25%", x2="75%", color=alt.Color("Origin").legend(None), ) ticks = base.mark_tick(color="white", size=14).encode( x="50%" ) outliers = base.transform_flatten( flatten=["outliers"] ).mark_point( style="boxplot-outliers" ).encode( x="outliers:Q", color="Origin", ) rules + bars + ticks + outliers .. _VegaFusion: https://vegafusion.io .. _DuckDB: https://duckdb.org/ .. _VegaFusion DuckDB: https://vegafusion.io/duckdb.html .. _vl-convert: https://github.com/vega/vl-convert ================================================ FILE: doc/user_guide/marks/arc.rst ================================================ .. currentmodule:: altair .. _user-guide-arc-marks: Arc ~~~ Arc marks are circular arcs defined by a center point plus angular and radial extents. Arc marks are typically used for radial plots such as pie and donut charts. Arc Mark Properties ------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import numpy as np import pandas as pd rad_slider = alt.binding_range(min=0, max=100, step=1) rad_var = alt.param(bind=rad_slider, value=0, name="radius") rad2_slider = alt.binding_range(min=0, max=100, step=1) rad_var2 = alt.param(bind=rad_slider, value=50, name="radius2") theta_slider = alt.binding_range(min=-2 * np.pi, max=2 * np.pi) theta_var = alt.param(bind=theta_slider, value=-0.73, name="theta_single_arc") theta_slider2 = alt.binding_range(min=-2 * np.pi, max=2 * np.pi) theta2_var = alt.param(bind=theta_slider, value=0.73, name="theta2_single_arc") corner_slider = alt.binding_range(min=0, max=50, step=1) corner_var = alt.param(bind=corner_slider, value=0, name="cornerRadius") pad_slider = alt.binding_range(min=0, max=np.pi / 2) pad_var = alt.param(bind=pad_slider, value=0, name="padAngle") source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) c1 = alt.Chart(source, title="Single Arc").mark_arc( radius=rad_var, radius2=rad_var2, theta=theta_var, theta2=theta2_var, cornerRadius=corner_var, padAngle=pad_var, ) c2 = ( alt.Chart(source, title="Stacked Arcs") .mark_arc( radius=rad_var, radius2=rad_var2, cornerRadius=corner_var, padAngle=pad_var, ) .encode( theta=alt.Theta(field="value", type="quantitative"), color=alt.Color(field="category", type="nominal"), ) ) alt.hconcat(c1.properties(width=200), c2.properties(width=200)).add_params( rad_var, rad_var2, theta_var, theta2_var, corner_var, pad_var ) An ``arc`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: radius radius2 innerRadius outerRadius theta theta2 cornerRadius padAngle radiusOffset radius2Offset thetaOffset theta2Offset Examples -------- We can create a pie chart by encoding ``theta`` or ``color`` arc marks. .. altair-plot:: import pandas as pd import altair as alt source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) alt.Chart(source).mark_arc().encode( theta=alt.Theta(field="value", type="quantitative"), color=alt.Color(field="category", type="nominal"), ) Setting ``innerRadius`` to non-zero values will create a donut chart. .. altair-plot:: import pandas as pd import altair as alt source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) alt.Chart(source).mark_arc(innerRadius=50).encode( theta=alt.Theta(field="value", type="quantitative"), color=alt.Color(field="category", type="nominal"), ) You can also add a text layer to add labels to a pie chart. .. altair-plot:: import pandas as pd import altair as alt source = pd.DataFrame( {"category": ["a", "b", "c", "d", "e", "f"], "value": [4, 6, 10, 3, 7, 8]} ) base = alt.Chart(source).encode( theta=alt.Theta("value:Q").stack(True), color=alt.Color("category:N").legend(None), ) pie = base.mark_arc(outerRadius=120) text = base.mark_text(radius=140, size=20).encode( text="category:N" ) pie + text ================================================ FILE: doc/user_guide/marks/area.rst ================================================ .. currentmodule:: altair .. _user-guide-area-marks: Area ~~~~~~~~~~ ``area`` represent multiple data element as a single area shape. Area marks are often used to show change over time, using either a single area or stacked areas. Area Mark Properties -------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import pandas as pd interpolate_select = alt.binding_select( options=[ "basis", "cardinal", "catmull-rom", "linear", "monotone", "natural", "step", "step-after", "step-before", ], name="interpolate", ) interpolate_var = alt.param(bind=interpolate_select, value="linear") tension_slider = alt.binding_range(min=0, max=1, step=0.05, name="tension") tension_var = alt.param(bind=tension_slider, value=0) source = pd.DataFrame({"u": [1, 2, 3, 4, 5, 6], "v": [28, 55, 42, 34, 36, 38]}) alt.Chart(source).mark_area(interpolate=interpolate_var, tension=tension_var).encode( x="u", y="v" ).add_params(interpolate_var, tension_var) An ``area`` mark definition can contain any :ref:`standard mark properties ` and the following line interpolation as well as line and point overlay properties: .. altair-object-table:: altair.MarkDef :properties: align baseline orient interpolate tension line point Examples -------- Area Chart ^^^^^^^^^^ Using ``area`` mark with one temporal or ordinal field (typically on ``x``) and one quantitative field (typically on ``y``) produces an area chart. For example, the following area chart shows a number of unemployment people in the US over time. .. altair-plot:: import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( x="yearmonth(date):T", y="sum(count):Q", ).properties(width=300, height=200) Area Chart with Overlaying Lines and Point Markers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ By setting ``line`` and ``point`` properties of the mark definition to ``true`` or an object defining a property of the overlaying point marks, we can overlay line and point markers on top of area. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks.url alt.Chart(source).mark_area(line=True, point=True).encode( x="date:T", y="price:Q", ).transform_filter( alt.datum.symbol == "GOOG" ) Instead of using a single color as the fill color of the area, we can set it to a gradient. In this example, we are also customizing the overlay. For more information about gradient options see the `Vega-Lite Gradient documentation `_. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).transform_filter(alt.datum.symbol == "GOOG").mark_area( line={"color": "darkgreen"}, color=alt.Gradient( gradient="linear", stops=[ alt.GradientStop(color="white", offset=0), alt.GradientStop(color="darkgreen", offset=1), ], x1=1, x2=1, y1=1, y2=0, ), ).encode( alt.X("date:T"), alt.Y("price:Q"), ) Stacked Area Chart ^^^^^^^^^^^^^^^^^^ Adding a color field to area chart creates stacked area chart by default. For example, here we split the area chart by industry. .. altair-plot:: import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( alt.X("yearmonth(date):T").axis(format="%Y", domain=False, tickSize=0), alt.Y("sum(count):Q"), alt.Color("series:N").scale(scheme="category20b"), ) Normalized Stacked Area Chart ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can also create a normalized stacked area chart by setting ``stack`` to ``"normalize"`` in the encoding channel. Here we can easily see the percentage of unemployment across industries. .. altair-plot:: import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( alt.X("yearmonth(date):T").axis(format="%Y", domain=False, tickSize=0), alt.Y("sum(count):Q").stack("normalize"), alt.Color("series:N").scale(scheme="category20b"), ) Steamgraph ^^^^^^^^^^^ We can also shift the stacked area chart's baseline to center and produces a streamgraph by setting ``stack`` to ``"center"`` in the encoding channel. Adding the ``interactive`` method allows for zooming and panning the x-scale. .. altair-plot:: import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( alt.X("yearmonth(date):T").axis(format="%Y", domain=False, tickSize=0), alt.Y("sum(count):Q").stack("center").axis(None), alt.Color("series:N").scale(scheme="category20b"), ).interactive() Ranged Area ^^^^^^^^^^^ Specifying ``X2`` or ``Y2`` for the quantitative axis of area marks produce ranged areas. For example, we can use ranged area to highlight the mininium and maximum measured temperatures over time, aggregated by ``monthdate``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_area(opacity=0.7).encode( alt.X("monthdate(date):T").title("Date"), alt.Y("mean(temp_max):Q").title("Daily Temperature Range (C)"), alt.Y2("mean(temp_min):Q"), ).properties(width=600, height=300) ================================================ FILE: doc/user_guide/marks/bar.rst ================================================ .. currentmodule:: altair .. _user-guide-bar-marks: Bar ~~~ Bar marks are useful in many visualizations, including bar charts, stacked bar charts, and timelines. Bar Mark Properties ------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import pandas as pd corner_slider = alt.binding_range(min=0, max=50, step=1) corner_var = alt.param(bind=corner_slider, value=0, name="cornerRadius") source = pd.DataFrame( { "a": ["A", "B", "C", "D", "E", "F", "G", "H", "I"], "b": [28, 55, 43, 91, 81, 53, 19, 87, 52], } ) alt.Chart(source).mark_bar(cornerRadius=corner_var).encode( x=alt.X("a:N").axis(labelAngle=0), y="b:Q", ).add_params(corner_var) A ``bar`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: width height orient align baseline binSpacing cornerRadius cornerRadiusEnd cornerRadiusTopLeft cornerRadiusTopRight cornerRadiusBottomRight cornerRadiusBottomLeft Examples -------- Single Bar Chart ^^^^^^^^^^^^^^^^ Mapping a quantitative field to either ``x`` or ``y`` of the ``bar`` mark produces a single bar chart. .. altair-plot:: import altair as alt from altair import datum from altair.datasets import data source = data.population.url alt.Chart(source).mark_bar().encode( alt.X("sum(people):Q").title("Population") ).transform_filter( datum.year == 2000 ) Bar Chart ^^^^^^^^^ If we map a different discrete field to the ``y`` channel, we can produce a horizontal bar chart. Specifying ``alt.Step(20)`` will adjust the bar's height per discrete step. .. altair-plot:: import altair as alt from altair import datum from altair.datasets import data source = data.population.url alt.Chart(source).mark_bar().encode( alt.X("sum(people):Q").title("Population"), alt.Y("age:O"), ).transform_filter( datum.year == 2000 ).properties(height=alt.Step(20)) Bar Chart with a Temporal Axis ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ While the ``bar`` mark typically uses the ``x`` and ``y`` channels to encode a pair of discrete and continuous fields, it can also be used with continuous fields on both channels. For example, given a bar chart with a temporal field on ``x``, we can see that the x-scale is a continuous scale. By default, the size of bars on continuous scales will be set based on the ``continuousBandSize`` config. .. altair-plot:: import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_bar().encode( alt.X("month(date):T").title("Date"), alt.Y("mean(precipitation):Q"), ) Histograms ^^^^^^^^^^ If the data is not pre-aggregated (i.e. each record in the data field represents one item), mapping a binned quantitative field to ``x`` and aggregate ``count`` to ``y`` produces a histogram. .. altair-plot:: import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( alt.X("IMDB Rating:Q").bin(), y='count()', ) Stacked Bar Chart ^^^^^^^^^^^^^^^^^ Adding color to the bar chart (by using the ``color`` attribute) creates a stacked bar chart by default. Here we also customize the color’s scale range to make the color a little nicer. (See ``stack`` for more details about customizing stack.) .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x="variety", y="sum(yield)", color="site" ) Grouped Bar Chart with Offset ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame( { "category": ["A", "A", "B", "B", "C", "C"], "group": ["x", "y", "z", "x", "y", "z"], "value": [0.1, 0.6, 0.9, 0.7, 0.2, 0.6], } ) alt.Chart(source).mark_bar().encode( x=alt.X("category:N"), xOffset="group:N", y=alt.Y("value:Q"), color=alt.Color("group:N"), ) ================================================ FILE: doc/user_guide/marks/boxplot.rst ================================================ .. currentmodule:: altair .. _user-guide-boxplot-marks: Box Plot ~~~~~~~~~ A box plot summarizes a distribution of quantitative values using a set of summary statistics. The median tick in the box represents the median. The lower and upper parts of the box represent the first and third quartile respectively. Depending on the type of box plot, the ends of the whiskers can represent multiple things. To create a box plot, use the ``mark_boxplot`` method. Box Plot Mark Properties ^^^^^^^^^^^^^^^^^^^^^^^^ A box plot's mark definition can contain the following properties: .. altair-object-table:: altair.BoxPlotDef :properties: extent orient size color opacity Besides the properties listed above, ``box``, ``median``, ``rule``, ``outliers``, and ``ticks`` can be used to specify the underlying mark properties for different parts of the box plots as well. Types of Box Plot ^^^^^^^^^^^^^^^^^ Altair supports two types of box plots, defined by the ``extent`` property in the mark definition object. 1. Tukey Box Plot is the default box plot in Altair. For a Tukey box plot, the whisker spans from the smallest data to the largest data within the range [Q1 - k * IQR, Q3 + k * IQR] where Q1 and Q3 are the first and third quartiles while IQR is the interquartile range (Q3-Q1). In this type of box plot, you can specify the constant k by setting the ``extent``. If there are outlier points beyond the whisker, they will be displayed using point marks. By default, the extent is ``1.5``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_boxplot().encode( alt.X("Miles_per_Gallon:Q").scale(zero=False) ) 2. ``min-max`` Box Plot is a box plot where the lower and upper whiskers are defined as the min and max respectively. No points will be considered as outliers for this type of box plots. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_boxplot(extent="min-max").encode( alt.X("Miles_per_Gallon:Q").scale(zero=False), alt.Y("Origin:N"), ) Dimension and Orientation ^^^^^^^^^^^^^^^^^^^^^^^^^ Altair supports bot 1D and 2D box plots: 1D box plot shows the distribution of a continuous field. A box plot’s orientation is automatically determined by the continuous field axis. For example, you can create a vertical 1D box plot by encoding a continuous field on the y axis. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_boxplot().encode( alt.Y("Miles_per_Gallon:Q").scale(zero=False) ) 2D box plot shows the distribution of a continuous field, broken down by categories. For 2D box plots with one continuous field and one discrete field, the box plot will be horizontal if the continuous field is on the x axis. Color, Size, and Opacity Encoding Channels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can customize the color, size, and opacity of the box in the box plot by using the ``color``, ``size``, and ``opacity`` encoding channels. The ``size`` is applied to only the box and median tick. The ``color`` is applied to only the box and the outlier points. Meanwhile, the ``opacity`` is applied to the whole ``boxplot``. An example of a box plot where the ``color`` encoding channel is specified. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_boxplot(extent="min-max").encode( alt.X("Origin:N"), alt.Y("Miles_per_Gallon:Q").scale(zero=False), alt.Color("Origin:N").legend(None), ) Tooltip Encoding Channels ^^^^^^^^^^^^^^^^^^^^^^^^^ You can add custom tooltips to box plots. The custom tooltip will override the default box plot's tooltips. If the field in the tooltip encoding is unaggregated, it replaces the tooltips of the outlier marks. On the other hand, if the field in the tooltip encoding is aggregated, it replaces the tooltips of the box and whisker marks. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_boxplot(extent="min-max").encode( alt.X("Miles_per_Gallon:Q").scale(zero=False), alt.Y("Origin:N"), alt.Tooltip("mean(Miles_per_Gallon)"), ) ================================================ FILE: doc/user_guide/marks/circle.rst ================================================ .. currentmodule:: altair .. _user-guide-circle-marks: Circle ~~~~~~ ``circle`` mark is similar to ``point`` mark, except that (1) the ``shape`` value is always set to ``circle`` (2) they are filled by default. Circle Mark Properties ^^^^^^^^^^^^^^^^^^^^^^ A ``circle`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: size Scatter Plot with Circle ^^^^^^^^^^^^^^^^^^^^^^^^ Here is an example scatter plot with ``circle`` marks: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url alt.Chart(source).mark_circle().encode( x=("Horsepower:Q"), y=("Miles_per_Gallon:Q"), ) ================================================ FILE: doc/user_guide/marks/errorband.rst ================================================ .. currentmodule:: altair .. _user-guide-errorband-marks: Error Band ~~~~~~~~~~ An error band summarizes an error range of quantitative values using a set of summary statistics, representing by area. Error band in Altair can either be used to aggregate raw data or directly visualize aggregated data. To create an error band, use ``mark_errorband``. Error Band Mark Properties ^^^^^^^^^^^^^^^^^^^^^^^^^^ An ``errorband`` mark definition can contain the following properties: .. altair-object-table:: altair.ErrorBandDef :properties: extent orient color opacity interpolate tension Besides the properties listed above, ``band`` and ``borders`` can be used to specify the underlying mark properties for different parts of the error band as well. Comparing the usage of Error Band to the usage of Error Bar ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ All the properties and usage of error band are identical to error bar’s, except the ``band`` and ``borders`` that replace the error bar’s ``rule`` and ``ticks``. **Error Band** .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url alt.Chart(source).mark_errorband(extent="ci", borders=True).encode( x="year(Year)", y=alt.Y( "Miles_per_Gallon:Q", scale=alt.Scale(zero=False), title="Miles per Gallon (95% CIs)", ), ) **Error Bar** .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url alt.Chart(source).mark_errorbar(extent="ci", ticks=True).encode( x="year(Year)", y=alt.Y( "Miles_per_Gallon:Q", scale=alt.Scale(zero=False), title="Miles per Gallon (95% CIs)", ), ) Using Error Band to Aggregate Raw Data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the data is not aggregated yet, Altair will aggregate the data based on the ``extent`` properties in the mark definition as done in the error band showing confidence interval above. All other ``extent`` values are defined in Error Bar. Using Error Band to Visualize Aggregated Data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1. Data is aggregated with low and high values of the error band If the data is already pre-aggregated with low and high values of the error band, you can directly specify ``x`` and ``x2`` (or ``y`` and ``y2``) to use error band as a ranged mark. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame( { "ci1": [23.5007, 25.8214, 26.4472, 27.7074], "ci0": [19.6912, 20.8554, 21.9749, 22.6203], "center": [21.5735, 23.3750, 24.0611, 25.0931], "Year": [189302400000, 220924800000, 252460800000, 283996800000], } ) band = alt.Chart(source).mark_errorband().encode( alt.Y( "ci1:Q", scale=alt.Scale(zero=False), title="Mean of Miles per Gallon (95% CIs)" ), alt.Y2("ci0:Q"), alt.X("year(Year)"), ) line = alt.Chart(source).mark_line().encode( alt.Y("center:Q"), alt.X("year(Year)") ) band + line 2. Data is aggregated with center and error value(s) If the data is already pre-aggregated with center and error values of the error band, you can use ``x/y``, ``x/yError``, and ``x/yError2`` as defined in Error Bar. Dimension ^^^^^^^^^ Altair supports both 1D and 2D error bands: A **1D error band** shows the error range of a continuous field; it can be used to show the global error range of the whole plot. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url band = alt.Chart(source).mark_errorband(extent="stdev").encode( alt.Y("Miles_per_Gallon:Q").title("Miles per Gallon") ) points = alt.Chart(source).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) band + points A **2D error** band shows the error range of a continuous field for each dimension value such as year. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() line = alt.Chart(source).mark_line().encode( x="Year", y="mean(Miles_per_Gallon)" ) band = alt.Chart(source).mark_errorband(extent="ci").encode( x="Year", y=alt.Y("Miles_per_Gallon").title("Miles/Gallon"), ) band + line Color and Opacity Encoding Channels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can customize the color and opacity of the bands by using the ``color`` and ``opacity`` encoding channels. Here is an example of a ``errorband`` with the ``color`` encoding channel set to ``alt.value('black')``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url alt.Chart(source).mark_errorband(extent="ci", borders=True).encode( x="year(Year)", y=alt.Y("Miles_per_Gallon:Q") .scale(zero=False) .title("Miles per Gallon (95% CIs)"), color=alt.value("black") ) ================================================ FILE: doc/user_guide/marks/errorbar.rst ================================================ .. currentmodule:: altair .. _user-guide-errorbar-marks: Error Bar ~~~~~~~~~~ An error bar summarizes an error range of quantitative values using a set of summary statistics, representing by rules (and optional end ticks). Error bars in Altair can either be used to aggregate raw data or directly visualize aggregated data. To create an error bar, use ``mark_errorbar``. Error Bar Mark Properties ^^^^^^^^^^^^^^^^^^^^^^^^^ An ``errorbar`` mark definition can contain the following properties: .. altair-object-table:: altair.ErrorBarDef :properties: extent orient color opacity Besides the properties listed above, ``rule`` and ``ticks`` can be used to specify the underlying mark properties for different parts of the error bar as well. Using Error Bars to Aggregate Raw Data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the data is not aggregated yet, Altair will aggregate the data based on the ``extent`` properties in the mark definition. 1. **Error bars showing standard error** is the default error bar in Vega-Lite. It can also be explicitly specified by setting ``extent`` to ``"stderr"``. The length of lower and upper rules represent standard error. By default, the rule marks expand from the mean. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar().encode( x=alt.X('yield:Q').scale(zero=False), y=alt.Y('variety:N') ) points = alt.Chart(source).mark_point( filled=True, color="black", ).encode( x=alt.X("mean(yield)"), y=alt.Y("variety:N"), ) error_bars + points 2. **Error bar showing standard deviation** can be specified by setting ``extent`` to ``"stdev"``. For this type of error bar, the length of lower and upper rules represent standard deviation. Like an error bar that shows Standard Error, the rule marks expand from the mean by default. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent="stdev").encode( x=alt.X("yield:Q").scale(zero=False), y=alt.Y("variety:N"), ) points = alt.Chart(source).mark_point(filled=True, color="black").encode( x=alt.X("mean(yield)"), y=alt.Y("variety:N"), ) error_bars + points 3. **Error bars showing interquartile range** can be specified by setting ``extent`` to ``"iqr"``. For this type of error bar, the rule marks expand from the first quartile to the third quartile. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent="iqr").encode( x=alt.X("yield:Q").scale(zero=False), y=alt.Y("variety:N"), ) points = alt.Chart(source).mark_point( filled=True, color="black" ).encode( x=alt.X("mean(yield)"), y=alt.Y("variety:N"), ) error_bars + points Using Error Bars to Visualize Aggregated Data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1. Data is aggregated with low and high values of the error bars If the data is already pre-aggregated with low and high values of the error bars, you can directly specify ``x`` and ``x2`` (or ``y`` and ``y2``) to use error bar as a ranged mark. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame({ "lower_yield": [23.1311, 23.9503, 24.7778, 21.7823], "upper_yield": [43.5522, 38.9775, 46.9167, 48.9732], "center": [32.4, 30.96667, 33.966665, 30.45], "variety": ["Glabron", "Manchuria", "No. 457", "No. 462"], }) bar = alt.Chart(source).mark_errorbar().encode( alt.X("upper_yield:Q").scale(zero=False).title("yield"), alt.X2("lower_yield:Q"), alt.Y("variety:N"), ) point = alt.Chart(source).mark_point( filled=True, color="black" ).encode( alt.X("center:Q"), alt.Y("variety:N") ) point + bar 2. Data is aggregated with center and error value(s) If the data is already pre-aggregated with center and error values of the error bars, you can directly specify ``x`` as center, ``xError`` and ``xError2`` as error values extended from center (or ``y``, ``yError``, and ``yError2``). If ``x/yError2`` is omitted, error bars have symmetric error values. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame({ "yield_error": [7.5522, 6.9775, 3.9167, 11.9732], "yield_center": [32.4, 30.96667, 33.966665, 30.45], "variety": ["Glabron", "Manchuria", "No. 457", "No. 462"], }) bar = alt.Chart(source).mark_errorbar().encode( x=alt.X("yield_center:Q").scale(zero=False).title("yield"), xError=("yield_error:Q"), y=alt.Y("variety:N"), ) point = alt.Chart(source).mark_point( filled=True, color="black" ).encode( alt.X("yield_center:Q"), alt.Y("variety:N"), ) point + bar **Note** if error is pre-aggregated with asymmetric error values one of ``x/yError`` and ``x/yError2`` has to be positive value and other has to be negative value. Dimension & Orientation ^^^^^^^^^^^^^^^^^^^^^^^ Altair supports both 1D and 2D error bands: A **1D error band** shows the error range of a continuous field. The orientation of an error bar is automatically determined by the continuous field axis. For example, you can create a vertical 1D error bar by encoding a continuous field on the y axis. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar().encode( alt.Y("yield:Q").scale(zero=False) ) points = alt.Chart(source).mark_point( filled=True, color="black" ).encode( alt.Y("mean(yield)") ) error_bars + points A **2D error bar** shows the error range of a continuous field, broken down by categories. For 2D error bars with one continuous field and one discrete field, the error bars will be horizontal if the continuous field is on the x axis. Alternatively, if the continuous field is on the y axis, the error bar will be vertical. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent="stdev").encode( alt.Y("yield:Q").scale(zero=False), alt.X("variety:N"), ) points = alt.Chart(source).mark_point( filled=True, color="black", ).encode( alt.Y("mean(yield)"), alt.X("variety:N"), ) error_bars + points Color, and Opacity Encoding Channels ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can customize the color and opacity of the bars by using the ``color`` and ``opacity`` encoding channels. Here is an example of a ``errorbar`` with the ``color`` encoding channel set to ``alt.value("#4682b4")``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(ticks=True).encode( alt.X("yield:Q").scale(zero=False), alt.Y("variety:N"), color=alt.value("#4682b4"), ) points = alt.Chart(source).mark_point( filled=True, color="black" ).encode( alt.X("mean(yield)"), alt.Y("variety:N"), ) error_bars + points Tooltip Encoding Channels ^^^^^^^^^^^^^^^^^^^^^^^^^ You can add custom tooltips to error bars. The custom tooltip will override the default error bar’s tooltips. .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_errorbar().encode( alt.X("yield:Q").scale(zero=False), alt.Y("variety:N"), tooltip="variety:N", ) ================================================ FILE: doc/user_guide/marks/geoshape.rst ================================================ .. currentmodule:: altair .. _user-guide-geoshape-marks: Geoshape ^^^^^^^^^^^^^ ``mark_geoshape`` represents an arbitrary shapes whose geometry is determined by specified spatial data. Geoshape Mark Properties ^^^^^^^^^^^^^^^^^^^^^^^^ A ``geoshape`` mark can contain any :ref:`standard mark properties `. Basic Map ^^^^^^^^^ Altair can work with many different geographical data formats, including geojson and topojson files. Often, the most convenient input format to use is a ``GeoDataFrame``. Here we load the Natural Earth 110m Cultural Vectors dataset and create a basic map using ``mark_geoshape``: .. altair-plot:: import altair as alt from altair.datasets import data import geopandas as gpd url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip" gdf_ne = gpd.read_file(url) # zipped shapefile gdf_ne = gdf_ne[["NAME", "CONTINENT", "POP_EST", 'geometry']] alt.Chart(gdf_ne).mark_geoshape() In the example above, Altair applies a default blue ``fill`` color and uses a default map projection (``equalEarth``). We can customize the colors and boundary stroke widths using standard mark properties. Using the ``project`` method we can also define a custom map projection manually: .. altair-plot:: alt.Chart(gdf_ne).mark_geoshape( fill='lightgrey', stroke='white', strokeWidth=0.5 ).project( type='albers' ) Focus & Filtering ^^^^^^^^^^^^^^^^^ By default Altair automatically adjusts the projection so that all the data fits within the width and height of the chart. Multiple approaches can be used to focus on specific regions of your spatial data. Namely: 1. Filter the source data within your GeoDataFrame. 2. Filter the source data using a ``transform_filter``. 3. Specify ``scale`` (zoom level) and ``translate`` (panning) within the ``project`` method. 4. Specify ``fit`` (extent) within the ``project`` & ``clip=True`` in the mark properties. The following examples applies these approaches to focus on continental Africa: 1. Filter the source data within your GeoDataFrame: .. altair-plot:: gdf_sel = gdf_ne.query("CONTINENT == 'Africa'") alt.Chart(gdf_sel).mark_geoshape() 2. Filter the source data using a ``transform_filter``: .. altair-plot:: alt.Chart(gdf_ne).mark_geoshape().transform_filter( alt.datum.CONTINENT == 'Africa' ) 3. Specify ``scale`` (zoom level) and ``translate`` (panning) within the ``project`` method: .. altair-plot:: alt.Chart(gdf_ne).mark_geoshape().project( scale=200, translate=[160, 160] # lon, lat ) 4. Specify ``fit`` (extent) within the ``project`` method & ``clip=True`` in the mark properties: .. altair-plot:: extent_roi = gdf_ne.query("CONTINENT == 'Africa'") xmin, ymin, xmax, ymax = extent_roi.total_bounds # fit object should be a GeoJSON-like Feature or FeatureCollection extent_roi_feature = { "type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[ [xmax, ymax], [xmax, ymin], [xmin, ymin], [xmin, ymax], [xmax, ymax]]]}, "properties": {} } alt.Chart(gdf_ne).mark_geoshape(clip=True).project( fit=extent_roi_feature ) Cartesian coordinates ^^^^^^^^^^^^^^^^^^^^^ The default projection of Altair is ``equalEarth``, which accurately represents the areas of the world's landmasses relative each other. This default assumes that your geometries are in degrees and referenced by longitude and latitude values. Another widely used coordinate system for data visualization is the 2d cartesian coordinate system. This coordinate system does not take into account the curvature of the Earth. In the following example the input geometry is not projected and is instead rendered directly in raw coordinates using the ``identity`` projection type. We have to define the ``reflectY`` as well since Canvas and SVG treats positive ``y`` as pointing down. .. altair-plot:: alt.Chart(gdf_sel).mark_geoshape().project( type='identity', reflectY=True ) .. note:: When working with spatial data, it's important to be aware of coordinate reference systems and geometry winding order. For detailed information on projections and winding order, see the :ref:`Spatial Data ` section in the data guide. Mapping Polygons ^^^^^^^^^^^^^^^^ The following example maps the visual property of the ``NAME`` column using the ``color`` encoding. .. altair-plot:: alt.Chart(gdf_sel).mark_geoshape().encode( color='NAME:N' ) Since each country is represented by a (multi)polygon, we can separate the ``stroke`` and ``fill`` definitions as such: .. altair-plot:: alt.Chart(gdf_sel).mark_geoshape( stroke='white', strokeWidth=1.5 ).encode( fill='NAME:N' ) Mapping Lines ^^^^^^^^^^^^^ By default Altair assumes for ``mark_geoshape`` that the mark's color is used for the fill color instead of the stroke color. This means that if your source data contain (multi)lines, you will have to explicitly define the ``filled`` value as ``False``. Compare: .. altair-plot:: gs_line = gpd.GeoSeries.from_wkt(['LINESTRING (0 0, 1 1, 0 2, 2 2, -1 1, 1 0)']) alt.Chart(gs_line).mark_geoshape().project( type='identity', reflectY=True ) With: .. altair-plot:: gs_line = gpd.GeoSeries.from_wkt(['LINESTRING (0 0, 1 1, 0 2, 2 2, -1 1, 1 0)']) alt.Chart(gs_line).mark_geoshape( filled=False ).project( type='identity', reflectY=True ) Using this approach one can also style Polygons as if they are Linestrings: .. altair-plot:: alt.Chart(gdf_sel).mark_geoshape( filled=False, strokeWidth=1.5 ).encode( stroke='NAME:N' ) Mapping Points ^^^^^^^^^^^^^^ Points can be drawn when they are defined as ``Points`` within a GeoDataFrame using ``mark_geoshape``. We first assign the centroids of Polygons as Point geometry and plot these: .. altair-plot:: # .copy() to prevent changing the original `gdf_sel` variable # derive centroid in a projected CRS (in meters) and visualize in a geographic CRS (in degrees). gdf_centroid = gpd.GeoDataFrame( data=gdf_sel.copy(), geometry=gdf_sel.geometry.to_crs(epsg=3857).centroid.to_crs(epsg=4326) ) alt.Chart(gdf_centroid).mark_geoshape() Caveat: To use the ``size`` encoding for the Points you will need to use the ``mark_circle`` in combination with the ``latitude`` and ``longitude`` encoding channel definitions. .. altair-plot:: gdf_centroid["lon"] = gdf_centroid.geometry.x gdf_centroid["lat"] = gdf_centroid.geometry.y alt.Chart(gdf_centroid).mark_circle().encode( longitude="lon:Q", latitude="lat:Q", size="POP_EST:Q" ) Altair also contains expressions related to geographical features. We can for example define the ``centroids`` using a ``geoCentroid`` expression: .. altair-plot:: basemap = alt.Chart(gdf_sel).mark_geoshape( fill='lightgray', stroke='white', strokeWidth=0.5 ) bubbles = alt.Chart(gdf_sel).transform_calculate( centroid=alt.expr.geoCentroid(None, alt.datum) ).mark_circle( stroke='black' ).encode( longitude='centroid[0]:Q', latitude='centroid[1]:Q', size="POP_EST:Q" ) (basemap + bubbles).project( type='identity', reflectY=True ) Choropleths ^^^^^^^^^^^ An alternative to showing the population sizes as bubbles, is to create a "Choropleth" map. These are geographical heatmaps where the color or each region are mapped to the values of a column in the dataframe. .. altair-plot:: alt.Chart(gdf_sel).mark_geoshape().encode( color='POP_EST' ) When we create choropleth maps, we need to be careful, because although the color changes according to the value of the column we are interested in, the size is tied to the area of each country and we might miss interesting values in small countries just because we can't easily see them on the map (e.g. if we were to visualize population density). Lookup datasets ^^^^^^^^^^^^^^^ Sometimes your data is separated in two datasets. One ``DataFrame`` with the data and one ``GeoDataFrame`` with the geometries. In this case you can use the ``lookup`` transform to collect related information from the other dataset. You can use the ``lookup`` transform in two directions: 1. Use a ``GeoDataFrame`` with geometries as source and lookup related information in another ``DataFrame``. 2. Use a ``DataFrame`` as source and lookup related geometries in a ``GeoDataFrame``. Depending on your use-case one or the other is more favorable. First we show an example of the first approach. Here we lookup the field ``rate`` from the ``df_us_unemp`` DataFrame, where the ``gdf_us_counties`` GeoDataFrame is used as source: .. altair-plot:: import altair as alt from altair.datasets import data gdf_us_counties = data.us_10m(layer="counties") df_us_unemp = data.unemployment() alt.Chart(gdf_us_counties).mark_geoshape().transform_lookup( lookup='id', from_=alt.LookupData(data=df_us_unemp, key='id', fields=['rate']) ).encode( alt.Color('rate:Q') ).project( type='albersUsa' ) Next, we show an example of the second approach. Here we lookup the geometries through the fields ``geometry`` and ``type`` from the ``gdf_us_counties`` GeoDataFrame, where the ``df_us_unemp`` DataFrame is used as source. .. altair-plot:: alt.Chart(df_us_unemp).mark_geoshape().transform_lookup( lookup='id', from_=alt.LookupData(data=gdf_us_counties, key='id', fields=['geometry', 'type']) ).encode( alt.Color('rate:Q') ).project( type='albersUsa' ) Choropleth Classification ^^^^^^^^^^^^^^^^^^^^^^^^^ In addition to displaying a continuous quantitative variable, choropleths can also be used to show discrete levels of a variable. While we should generally be careful to not create artificial groups when discretizing a continuous variable, it can be very useful when we have natural cutoff levels of a variable that we want to showcase clearly. We first define a utility function ``classify()`` that we will use to showcase different approaches to make a choropleth map. We apply it to define a choropleth map of the unemployment statistics of 2018 of US counties using a ``linear`` scale. .. altair-plot:: import altair as alt from altair.datasets import data def classify(type, domain=None, nice=False, title=None): # define data us_counties = alt.topo_feature(data.us_10m.url, "counties") us_unemp = data.unemployment.url # define choropleth scale if "threshold" in type: scale = alt.Scale(type=type, domain=domain, scheme="inferno") else: scale = alt.Scale(type=type, nice=nice, scheme="inferno") # define title if title is None: title = type # define choropleth chart choropleth = ( alt.Chart(us_counties, title=title) .mark_geoshape() .transform_lookup( lookup="id", from_=alt.LookupData(data=us_unemp, key="id", fields=["rate"]) ) .encode( alt.Color( "rate:Q", scale=scale, legend=alt.Legend( direction="horizontal", orient="bottom", format=".1%" ), ) ) .project(type="albersUsa") ) return choropleth classify(type='linear') We visualize the unemployment ``rate`` in percentage of 2018 with a ``linear`` scale range using a ``mark_geoshape()`` to present the spatial patterns on a map. Each value/ county has defined a `unique` color. This gives a bit of insight, but often we like to group the distribution into classes. By grouping values in classes, you can classify the dataset so all values/geometries in each class get assigned the same color. Here we present a number of scale methods how Altair can be used: - ``quantile``, this type will divide your dataset (`domain`) into intervals of similar sizes. Each class contains more or less the same number of values/geometries (`equal counts`). The scale definition will look as follow: .. code:: python alt.Scale(type='quantile') And applied in our utility function: .. altair-plot:: classify(type='quantile', title=['quantile', 'equal counts']) - ``quantize``, this type will divide the extent of your dataset (`range`) in equal intervals. Each class contains different number of values, but the step size is equal (`equal range`). The scale definition will look as follow: .. code:: python alt.Scale(type='quantize') And applied in our utility function: .. altair-plot:: classify(type='quantize', title=['quantize', 'equal range']) The ``quantize`` method can also be used in combination with ``nice``. This will `"nice"` the domain before applying quantization. As such: .. code:: python alt.Scale(type='quantize', nice=True) And applied in our utility function: .. altair-plot:: classify(type='quantize', nice=True, title=['quantize', 'equal range nice']) - ``threshold``, this type will divide your dataset in separate classes by manually specifying the cut values. Each class is separated by defined classes. The scale definition will look as follow: .. code:: python alt.Scale(type='threshold', domain=[0.05, 0.20]) And applied in our utility function: .. altair-plot:: classify(type='threshold', domain=[0.05, 0.20]) The definition above will create 3 classes. One class with values below `0.05`, one class with values from `0.05` to `0.20` and one class with values higher than `0.20`. So which method provides the optimal data classification for choropleth maps? As usual, it depends. There is another popular method that aid in determining class breaks. This method will maximize the similarity of values in a class while maximizing the distance between the classes (`natural breaks`). The method is also known as the Fisher-Jenks algorithm and is similar to k-Means in 1D: - By using the external Python package ``jenskpy`` we can derive these `optimum` breaks as such: .. code:: python >>> from jenkspy import JenksNaturalBreaks >>> jnb = JenksNaturalBreaks(5) >>> jnb.fit(df_us_unemp['rate']) >>> jnb.inner_breaks_ [0.061, 0.088, 0.116, 0.161] And applied in our utility function: .. altair-plot:: classify(type='threshold', domain=[0.061, 0.088, 0.116, 0.161], title=['threshold Jenks','natural breaks']) Caveats: - For the type ``quantize`` and ``quantile`` scales we observe that the default number of classes is 5. You can change the number of classes using a ``SchemeParams()`` object. In the above specification we can change ``scheme='turbo'`` into ``scheme=alt.SchemeParams('turbo', count=2)`` to manually specify usage of 2 classes for the scheme within the scale. - The natural breaks method will determine the optimal class breaks given the required number of classes, but how many classes should you pick? One can investigate usage of goodness of variance fit (GVF), aka Jenks optimization method, to determine this. Repeat a Map ^^^^^^^^^^^^ The :class:`RepeatChart` pattern, accessible via the :meth:`Chart.repeat` method provides a convenient interface for a particular type of horizontal or vertical concatenation of a multi-dimensional dataset. In the following example we have a dataset referenced as ``source`` from which we use three columns defining the ``population``, ``engineers`` and ``hurricanes`` of each US state. The ``states`` is defined by making use of :func:`topo_feature` using ``url`` and ``feature`` as parameters. This is a convenience function for extracting features from a topojson url. These variables we provide as list in the ``.repeat()`` operator, which we refer to within the color encoding as ``alt.repeat('row')`` .. altair-plot:: import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, 'states') source = data.population_engineers_hurricanes.url variable_list = ['population', 'engineers', 'hurricanes'] alt.Chart(states).mark_geoshape(tooltip=True).encode( alt.Color(alt.repeat('row'), type='quantitative') ).transform_lookup( lookup='id', from_=alt.LookupData(source, 'id', variable_list) ).project( type='albersUsa' ).repeat( row=variable_list ).resolve_scale( color='independent' ) Facet a Map ^^^^^^^^^^^ The :class:`FacetChart` pattern, accessible via the :meth:`Chart.facet` method provides a convenient interface for a particular type of horizontal or vertical concatenation of a dataset where one field contain multiple ``variables``. Unfortunately, until https://github.com/vega/altair/issues/2369 is resolved regular faceting will not work for geographic visualization: .. altair-plot:: source = data.population_engineers_hurricanes().melt(id_vars=['state', 'id']) us_states = data.us_10m(layer="states") gdf_comb = gpd.GeoDataFrame(source.join(us_states, on='id', rsuffix='_y')) alt.Chart(gdf_comb).mark_geoshape().encode( color=alt.Color('value:Q'), facet=alt.Facet('variable:N').columns(3) ).properties( width=180, height=130 ).resolve_scale('independent') For now, there are two possible workarounds. You can either pass the geographic data via a transform lookup instead of via :class:`Chart` as in the :ref:`gallery_us_incomebrackets_by_state_facet` gallery example. Or, you can manually filter the data in pandas, and create a small multiples chart via concatenation as in the following example: .. altair-plot:: source = data.population_engineers_hurricanes().melt(id_vars=['state', 'id']) us_states = data.us_10m(layer="states") gdf_comb = gpd.GeoDataFrame(source.join(us_states, on='id', rsuffix='_y')) alt.concat( *( alt.Chart(gdf_comb[gdf_comb.variable == var], title=var) .mark_geoshape() .encode( color=alt.Color( "value:Q", legend=alt.Legend(orient="bottom", direction="horizontal") ) ) .project('albersUsa') .properties(width=180, height=130) for var in gdf_comb.variable.unique() ), columns=3 ).resolve_scale(color="independent") Interactions ^^^^^^^^^^^^ Often a map does not come alone, but is used in combination with another chart. Here we provide an example of an interactive visualization of a bar chart and a map. The data shows the states of the US in combination with a bar chart showing the 15 most populous states. Using an ``alt.selection_point()`` we define a selection parameter that connects to our left-mouseclick. .. altair-plot:: import altair as alt from altair.datasets import data # load the data us_states = data.us_10m(layer="states") us_population = data.population_engineers_hurricanes()[["state", "id", "population"]] # define a pointer selection click_state = alt.selection_point(fields=["state"]) # define a condition on the opacity encoding depending on the selection opacity = alt.when(click_state).then(alt.value(1)).otherwise(alt.value(0.2)) # create a choropleth map using a lookup transform choropleth = ( alt.Chart(us_states) .mark_geoshape() .transform_lookup( lookup="id", from_=alt.LookupData(us_population, "id", ["population", "state"]) ) .encode( color="population:Q", opacity=opacity, tooltip=["state:N", "population:Q"], ) .project(type="albersUsa") ) # create a bar chart with the same conditional ``opacity`` encoding. bars = ( alt.Chart( us_population.nlargest(15, "population"), title="Top 15 states by population" ) .mark_bar() .encode( x="population", opacity=opacity, color="population", y=alt.Y("state").sort("-x"), ) ) (choropleth & bars).add_params(click_state) The interaction is two-directional. If you click (shift-click for multi-selection) on a geometry or bar the selection receive an ``opacity`` of ``1`` and the remaining an ``opacity`` of ``0.2``. It is also possible to create charts with interval selections, as can be seen in the :ref:`gallery_interval_selection_map_quakes` gallery example. Expressions ^^^^^^^^^^^ Altair expressions can be used within a geographical visualization. The following example visualize earthquakes on the globe using an ``orthographic`` projection. Where we can rotate the earth on a single-axis. (``rotate0``). The utility function :func:`sphere` is adopted to get a disk of the earth as background. The GeoDataFrame with the earthquakes has an ``XYZ``` point geometry, where each coordinate represent ``lon``, ``lat`` and ``depth`` respectively. We use here an elegant way to access the nested point coordinates from the geometry column directly to draw circles. Using this approach we do not need to assign them to three separate columns first. .. altair-plot:: import altair as alt from altair.datasets import data # load data gdf_quakies = data.earthquakes() gdf_world = data.world_110m(layer="countries") # define parameters range0 = alt.binding_range(min=-180, max=180, step=5, name='rotate longitude ') rotate0 = alt.param(value=120, bind=range0) hover = alt.selection_point(on="pointerover", clear="pointerout") # world disk sphere = alt.Chart(alt.sphere()).mark_geoshape( fill="aliceblue", stroke="black", strokeWidth=1.5 ) # countries as shapes world = alt.Chart(gdf_world).mark_geoshape( fill="mintcream", stroke="black", strokeWidth=0.35 ) # earthquakes as circles with fill for depth and size for magnitude # the hover param is added on the mar_circle only quakes = ( alt.Chart(gdf_quakies) .mark_circle(opacity=0.35, tooltip=True, stroke="black") .transform_calculate( lon="datum.geometry.coordinates[0]", lat="datum.geometry.coordinates[1]", depth="datum.geometry.coordinates[2]", ) .transform_filter( ((rotate0 * -1 - 90 < alt.datum.lon) & (alt.datum.lon < rotate0 * -1 + 90)).expr ) .encode( longitude="lon:Q", latitude="lat:Q", strokeWidth=alt.when(hover, empty=False).then(alt.value(1)).otherwise(alt.value(0)), size=alt.Size( "mag:Q", scale=alt.Scale(type="pow", range=[1, 1000], domain=[0, 6], exponent=4), ), fill=alt.Fill( "depth:Q", scale=alt.Scale(scheme="lightorange", domain=[0, 400]) ), ) .add_params(hover, rotate0) ) # define projection and add the rotation param for all layers comb = alt.layer(sphere, world, quakes).project( type="orthographic", rotate=alt.expr(f"[{rotate0.name}, 0, 0]") ) comb The earthquakes are displayed using a ``mark_geoshape`` and filtered once out of sight of the visible part of the world. A hover highlighting is added to get more insight of each earthquake. Tile-based Maps ^^^^^^^^^^^^^^^ To use tile-based maps (such as OpenStreetMap) as the background for ``mark_geoshape``, you can use the package `Altair Tiles `_ together with Altair. ================================================ FILE: doc/user_guide/marks/image.rst ================================================ .. currentmodule:: altair .. _user-guide-image-marks: Image ~~~~~~ Image marks allow external images, such as icons or photographs, to be included in Altair visualizations. Image files such as PNG or JPG images are loaded from provided URLs. Image Mark Properties ^^^^^^^^^^^^^^^^^^^^^ An ``image`` mark can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: url aspect align baseline Scatter Plot with Image Marks ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame.from_records( [ { "x": 0.5, "y": 0.5, "img": "https://vega.github.io/vega-datasets/data/ffox.png", }, { "x": 1.5, "y": 1.5, "img": "https://vega.github.io/vega-datasets/data/gimp.png", }, { "x": 2.5, "y": 2.5, "img": "https://vega.github.io/vega-datasets/data/7zip.png", }, ] ) alt.Chart(source).mark_image(width=50, height=50).encode(x="x", y="y", url="img") Show Image Marks with Selection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This example demonstrates how to display image marks with drag selection. We create two charts: one with point marks and the other with image marks, applying the selection filter only to the latter. By combining these two charts, we can achieve the desired result. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame.from_records( [{'a': 1, 'b': 1, 'image': 'https://altair-viz.github.io/_static/altair-logo-light.png'}, {'a': 2, 'b': 2, 'image': 'https://avatars.githubusercontent.com/u/11796929?s=200&v=4'}] ) brush = alt.selection_interval() point = alt.Chart(source).mark_circle(size=100).encode( x='a', y='b', ).add_params( brush ) img = alt.Chart(source).mark_image(width=50, height=75).encode( x='a', y='b', url='image' ).transform_filter( brush ) point + img In the layered chart, images may overlap one other. An alternative is to use a faceted image chart beside the original chart: .. altair-plot:: img_faceted = alt.Chart(source, width=50, height=75).mark_image().encode( url='image' ).facet( alt.Facet('image', title='', header=alt.Header(labelFontSize=0)) ).transform_filter( brush ) point | img_faceted If we want the images to not be visible in the initial chart we could add ``empty=False`` to the interval selection. However, Altair will not automatically resize the chart area to include the faceted chart when a selection is made, which means it seems like the selection has no effect. In order to resize the chart automatically, we need to explicitly set the ``autosize`` option in the ``configure`` method. .. altair-plot:: brush = alt.selection_interval(empty=False) point = alt.Chart(source).mark_circle(size=100).encode( x='a', y='b', ).add_params( brush ) img_faceted = alt.Chart(source, width=50, height=75).mark_image().encode( url='image' ).facet( alt.Facet('image', title='', header=alt.Header(labelFontSize=0)) ).transform_filter( brush ) (point | img_faceted).configure( autosize=alt.AutoSizeParams(resize=True) ) Use Local Images as Image Marks ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ We could also show local images by first converting them to base64-encoded_ strings. In the example below, we load two images saved in the Altair repo; you can replace the image paths below with the location of the desired images on your machine. This approach also works with images stored as Numpy Arrays as can be seen in the tutorial :ref:`Displaying Numpy Images in Tooltips `. .. altair-plot:: import base64 import altair as alt import pandas as pd from io import BytesIO from PIL import Image image_paths = ["doc/_static/gray-square.png","doc/_static/altair-logo-light.png"] base64_images = [] for image_path in image_paths: pil_image = Image.open(image_path) output = BytesIO() pil_image.save(output, format='PNG') base64_images.append( "data:image/png;base64," + base64.b64encode(output.getvalue()).decode() ) source = pd.DataFrame({"x": [1, 2], "y": [1, 2], "image": base64_images}) alt.Chart(source).mark_image( width=50, height=50 ).encode( x='x', y='y', url='image' ) Image Tooltip ^^^^^^^^^^^^^ This example shows how to render images in tooltips. Either URLs or local file paths can be used to reference the images. To render the image, you must use the special column name "image" in your data and pass it as a list to the tooltip encoding. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame.from_records( [{'a': 1, 'b': 1, 'image': 'https://altair-viz.github.io/_static/altair-logo-light.png'}, {'a': 2, 'b': 2, 'image': 'https://avatars.githubusercontent.com/u/11796929?s=200&v=4'}] ) alt.Chart(source).mark_circle(size=200).encode( x='a', y='b', tooltip=['image'] # Must be a list containing a field called "image" ) .. _base64-encoded: https://en.wikipedia.org/wiki/Binary-to-text_encoding ================================================ FILE: doc/user_guide/marks/index.rst ================================================ .. currentmodule:: altair .. _user-guide-marks: Marks ~~~~~ We saw in :ref:`user-guide-encoding` that the :meth:`~Chart.encode` method is used to map columns to visual attributes of the plot. The ``mark`` property is what specifies how exactly those attributes should be represented on the plot. Altair supports the following primitive mark types: ========================================= ========================================= ================================================================================ Mark Method Description ========================================= ========================================= ================================================================================ :ref:`user-guide-arc-marks` :meth:`~Chart.mark_arc` A pie chart. :ref:`user-guide-area-marks` :meth:`~Chart.mark_area` A filled area plot. :ref:`user-guide-bar-marks` :meth:`~Chart.mark_bar` A bar plot. :ref:`user-guide-circle-marks` :meth:`~Chart.mark_circle` A scatter plot with filled circles. :ref:`user-guide-geoshape-marks` :meth:`~Chart.mark_geoshape` Visualization containing spatial data :ref:`user-guide-image-marks` :meth:`~Chart.mark_image` A scatter plot with image markers. :ref:`user-guide-line-marks` :meth:`~Chart.mark_line` A line plot. :ref:`user-guide-point-marks` :meth:`~Chart.mark_point` A scatter plot with configurable point shapes. :ref:`user-guide-rect-marks` :meth:`~Chart.mark_rect` A filled rectangle, used for heatmaps :ref:`user-guide-rule-marks` :meth:`~Chart.mark_rule` A vertical or horizontal line spanning the axis. :ref:`user-guide-square-marks` :meth:`~Chart.mark_square` A scatter plot with filled squares. :ref:`user-guide-text-marks` :meth:`~Chart.mark_text` A scatter plot with points represented by text. :ref:`user-guide-tick-marks` :meth:`~Chart.mark_tick` A vertical or horizontal tick mark. :ref:`user-guide-trail-marks` :meth:`~Chart.mark_trail` A line with variable widths. ========================================= ========================================= ================================================================================ In addition, Altair provides the following composite marks: ========================================= ============================== ================================ ================================== Mark Name Method Description Example ========================================= ============================== ================================ ================================== :ref:`user-guide-boxplot-marks` :meth:`~Chart.mark_boxplot` A box plot. :ref:`gallery_boxplot` :ref:`user-guide-errorband-marks` :meth:`~Chart.mark_errorband` A continuous band around a line. :ref:`gallery_line_with_ci` :ref:`user-guide-errorbar-marks` :meth:`~Chart.mark_errorbar` An error bar around a point. :ref:`gallery_errorbars_with_ci` ========================================= ============================== ================================ ================================== In Altair, marks can be most conveniently specified by the ``mark_*`` methods of the Chart object (e.g. ``mark_bar``), which take optional keyword arguments to configure the look of the marks. .. _mark-properties: Mark Properties _______________ This section lists standard mark properties for primitive mark types. Additionally, some marks may have special mark properties (listed in their documentation page). General Mark Properties ^^^^^^^^^^^^^^^^^^^^^^^ .. altair-object-table:: altair.MarkDef :properties: aria description style tooltip clip invalid order Position and Offset Properties ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-object-table:: altair.MarkDef :properties: x x2 width height y y2 xOffset x2Offset yOffset y2Offset Color Properties ^^^^^^^^^^^^^^^^ .. altair-object-table:: altair.MarkDef :properties: filled color fill stroke blend opacity fillOpacity strokeOpacity Stroke Style Properties ^^^^^^^^^^^^^^^^^^^^^^^ .. altair-object-table:: altair.MarkDef :properties: strokeCap strokeDash strokeDashOffset strokeJoin strokeMiterLimit strokeWidth Hyperlink Properties ^^^^^^^^^^^^^^^^^^^^ Marks can act as hyperlinks when the ``href`` property or :ref:`channel ` is defined. When the ``href`` property is specified, the ``cursor`` mark property is set to ``"pointer"`` by default to serve as affordance for hyperlinks. .. altair-object-table:: altair.MarkDef :properties: href cursor .. toctree:: :hidden: arc area bar boxplot circle errorband errorbar geoshape image line point rect rule square text tick trail ================================================ FILE: doc/user_guide/marks/line.rst ================================================ .. currentmodule:: altair .. _user-guide-line-marks: Line ~~~~ The ``line`` mark represents the data points stored in a field with a line connecting all of these points. Line marks are commonly used to depict trajectories or change over time. Unlike most other marks that represent one data element per mark, one line mark represents multiple data element as a single line, akin to ``area`` and ``trail``. Note: For line segments that connect (x,y) positions to (x2,y2) positions, please use ``rule`` marks. For continuous lines with varying size, please use ``trail`` marks. Line Mark Properties -------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import pandas as pd interpolate_select = alt.binding_select( options=[ "basis", "basis-open", "basis-closed", "bundle", "cardinal", "cardinal-open", "cardinal-closed", "catmull-rom", "linear", "linear-closed", "monotone", "natural", "step", "step-before", "step-after", ], name="interpolate", ) interpolate_var = alt.param(bind=interpolate_select, value="linear") tension_slider = alt.binding_range(min=0, max=1, step=0.05, name="tension") tension_var = alt.param(bind=tension_slider, value=0) strokeWidth_slider = alt.binding_range(min=0, max=10, step=0.5, name="strokeWidth") strokeWidth_var = alt.param(bind=strokeWidth_slider, value=2) strokeCap_select = alt.binding_select( options=["butt", "round", "square"], name="strokeCap", ) strokeCap_var = alt.param(bind=strokeCap_select, value="butt") strokeDash_select = alt.binding_select( options=[[1, 0], [8, 8], [8, 4], [4, 4], [4, 2], [2, 1], [1, 1]], name="strokeDash", ) strokeDash_var = alt.param(bind=strokeDash_select, value=[1, 0]) source = pd.DataFrame({"u": [1, 2, 3, 4, 5, 6], "v": [28, 55, 42, 34, 36, 38]}) alt.Chart(source).mark_line( interpolate=interpolate_var, tension=tension_var, strokeWidth=strokeWidth_var, strokeCap=strokeCap_var, strokeDash=strokeDash_var, ).encode(x="u", y="v").add_params( interpolate_var, tension_var, strokeWidth_var, strokeCap_var, strokeDash_var ) A ``line`` mark definition can contain any :ref:`standard mark properties ` and the following line interpolation and point overlay properties: .. altair-object-table:: altair.MarkDef :properties: orient interpolate tension point Examples -------- Line Chart ^^^^^^^^^^ Using line with one temporal or ordinal field (typically on ``x``) and another quantitative field (typically on ``y``) produces a simple line chart with a single line. .. altair-plot:: import altair as alt from altair import datum from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x="date", y="price", ).transform_filter(datum.symbol == "GOOG") We can add create multiple lines by grouping along different attributes, such as ``color`` or ``detail``. Multi-series Colored Line Chart ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Adding a field to a mark property channel such as ``color`` groups data points into different series, producing a multi-series colored line chart. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x="date", y="price", color="symbol", ) We can further apply selection to highlight a certain line on hover. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() highlight = alt.selection_point( on="pointerover", fields=["symbol"], nearest=True ) base = alt.Chart(source).encode( x="date:T", y="price:Q", color="symbol:N" ) points = base.mark_circle().encode( opacity=alt.value(0) ).add_params( highlight ).properties( width=600 ) lines = base.mark_line().encode( size=alt.when(~highlight).then(alt.value(1)).otherwise(alt.value(3)) ) points + lines Multi-series Line Chart with Varying Dashes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Adding a field to ``strokeDash`` also produces a multi-series line chart. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x="date", y="price", strokeDash="symbol", ) We can also use line grouping to create a line chart that has multiple parts with varying styles. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame({ "a": ["A", "B", "D", "E", "E", "G", "H"], "b": [28, 55, 91, 81, 81, 19, 87], "predicted": [False, False, False, False, True, True, True] }) alt.Chart(source).mark_line().encode( x="a:O", y="b:Q", strokeDash="predicted:N" ) Multi-series Line Chart with the Detail Channel ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To group lines by a field without mapping the field to any visual properties, we can map the field to the ``detail`` channel to create a multi-series line chart with the same color. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x="date", y="price", detail="symbol", ) The same method can be used to group lines for a ranged dot plot. .. altair-plot:: import altair as alt from altair.datasets import data source = data.countries() base = alt.Chart(source).encode( alt.X("life_expect:Q") .scale(zero=False) .title("Life Expectancy (years)"), alt.Y("country:N") .axis(offset=5, ticks=False, minExtent=70, domain=False) .title("Country") ).transform_filter( alt.FieldOneOfPredicate(field="country", oneOf=["China", "India", "United States", "Indonesia", "Brazil"]) ) line = base.mark_line().encode( detail="country", color=alt.value("#db646f") ).transform_filter( alt.FieldOneOfPredicate(field="year", oneOf=[1995, 2000]) ) point = base.mark_point(filled=True).encode( alt.Color("year").scale(range=["#e6959c", "#911a24"], domain=[1995, 2000]), size=alt.value(100), opacity=alt.value(1), ) line + point Line Chart with Point Markers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ By setting the ``point`` property of the mark definition to ``True`` or an object defining a property of the overlaying point marks, we can overlay point markers on top of a line. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(point=True).encode( x="year(date)", y="mean(price):Q", color="symbol:N" ) This is equivalent to adding another layer of filled point marks. Note that the overlay point marks have ``opacity`` = 1 by default (instead of semi-transparent like normal point marks). Here we create stroked points by setting ``filled`` to ``False`` and ``fill`` to ``"white"``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line( point=alt.OverlayMarkDef(filled=False, fill="white") ).encode( x="year(date)", y="mean(price):Q", color="symbol:N" ) Custom Ordering ^^^^^^^^^^^^^^^ By default, the line's path (order of points in the line) is determined by data values on the temporal/ordinal field. However, a field can be mapped to the ``order`` channel for determining a custom path. For example, to show a pattern of data change over time between gasoline price and miles driven per capita we use ``order`` channel to sort the points in the line by the year field. In this example, we also use the ``point`` property to overlay point marks over the line marks to highlight each data point. Now, the earliest datapoint (1956) is one endpoint of the line, and the latest datapoint (2010) is the other endpoint of the line. .. altair-plot:: import altair as alt from altair.datasets import data source = data.driving() alt.Chart(source).mark_line(point=True).encode( alt.X("miles").scale(zero=False), alt.Y("gas").scale(zero=False), order="year", tooltip=["miles", "gas", "year"], ) Line Interpolation ^^^^^^^^^^^^^^^^^^ The ``interpolate`` property of a mark definition can be used to change line interpolation method. For example, we can set ``interpolate`` to ``"monotone"``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(interpolate="monotone").encode( x="date", y="price", ).transform_filter( alt.datum.symbol == "GOOG" ) We can also set ``interpolate`` to ``"step-after"`` to create a step-chart. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(interpolate="step-after").encode( x="date", y="price" ).transform_filter( alt.datum.symbol == "GOOG" ) Geo Line ^^^^^^^^ By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can draw lines through geographic points. .. altair-plot:: import altair as alt from altair.datasets import data import pandas as pd airports = data.airports.url flights_airport = data.flights_airport.url states = alt.topo_feature(data.us_10m.url, feature="states") lookup_data = alt.LookupData( airports, key="iata", fields=["state", "latitude", "longitude"] ) source = pd.DataFrame({ "airport": ["SEA", "SFO", "LAX", "LAS", "DFW", "DEN", "ORD", "JFK"], "order": [1, 2, 3, 4, 5, 6, 7, 8], }) background = alt.Chart(states).mark_geoshape( fill="lightgray", stroke="white" ).properties( width=750, height=500, ).project("albersUsa") line = alt.Chart(source).mark_line().encode( latitude="latitude:Q", longitude="longitude:Q", order="order" ).transform_lookup( lookup="airport", from_=lookup_data ) background + line ================================================ FILE: doc/user_guide/marks/point.rst ================================================ .. currentmodule:: altair .. _user-guide-point-marks: Point ~~~~~ ``point`` mark represents each data point with a symbol. Point marks are commonly used in visualizations like scatter plots. Point Mark Properties --------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt from altair.datasets import data source = data.cars() shape_select = alt.binding_select( options=[ "arrow", "circle", "square", "cross", "diamond", "triangle", "triangle-up", "triangle-down", "triangle-right", "triangle-left", "wedge", "stroke", "M-1,-1H1V1H-1Z", "M0,.5L.6,.8L.5,.1L1,-.3L.3,-.4L0,-1L-.3,-.4L-1,-.3L-.5,.1L-.6,.8L0,.5Z", ], name="shape", ) shape_var = alt.param(bind=shape_select, value="circle") angle_slider = alt.binding_range(min=-360, max=360, step=1, name="angle") angle_var = alt.param(bind=angle_slider, value=0) size_slider = alt.binding_range(min=0, max=500, step=10, name="size") size_var = alt.param(bind=size_slider, value=50) strokeWidth_slider = alt.binding_range(min=0, max=10, step=0.5, name="strokeWidth") strokeWidth_var = alt.param(bind=strokeWidth_slider, value=2) alt.Chart(source).mark_point( shape=shape_var, angle=angle_var, size=size_var, strokeWidth=strokeWidth_var, ).encode(x="Horsepower:Q", y="Miles_per_Gallon:Q").add_params( shape_var, angle_var, size_var, strokeWidth_var ) A ``point`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: shape size Examples -------- Dot Plot ^^^^^^^^ Mapping a field to either only ``x`` or only ``y`` of point marks creates a dot plot. .. altair-plot:: import altair as alt from altair.datasets import data source = data.movies() alt.Chart(source).mark_point().encode( x="IMDB Rating:Q" ) Scatter Plot ^^^^^^^^^^^^ Mapping fields to both the ``x`` and ``y`` channels creates a scatter plot. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) By default, ``point`` marks only have borders and are transparent inside. You can create a filled point by setting ``filled`` to ``True``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point(filled=True).encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) Bubble Plot ^^^^^^^^^^^ By mapping a third field to the ``size`` channel in the scatter plot, we can create a bubble plot instead. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", size="Acceleration:Q", ) Scatter Plot with Color and/or Shape ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Fields can also be encoded in the scatter plot using the ``color`` or ``shape`` channels. For example, this specification encodes the field ``Origin`` with both ``color`` and ``shape``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point().encode( alt.X("Miles_per_Gallon:Q").scale(zero=False), alt.Y("Horsepower:Q").scale(zero=False), color="Origin:N", shape="Origin:N", ) Dot Plot with Jittering ^^^^^^^^^^^^^^^^^^^^^^^ To jitter points on a discrete scale, you can add a random offset: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point().encode( x="Horsepower:Q", y="Cylinders:O", yOffset="random:Q", ).transform_calculate( random="random()" ).properties( height=alt.Step(50) ) Wind Vector Example ^^^^^^^^^^^^^^^^^^^ We can also use point mark with ``wedge`` as ``shape`` and ``angle`` encoding to create a wind vector map. Other shape options are: ``"circle"``, ``"square"``, ``"cross"``, ``"diamond"``, ``"triangle-up"``, ``"triangle-down"``, ``"triangle-right"``, ``"triangle-left"``, ``"stroke"``, ``"arrow"``, and ``"triangle"``. .. altair-plot:: import altair as alt from altair.datasets import data source = data.windvectors() alt.Chart(source).mark_point(shape="wedge", filled=True).encode( latitude="latitude", longitude="longitude", color=alt.Color("dir").scale(domain=[0, 360], scheme="rainbow").legend(None), angle=alt.Angle("dir").scale(domain=[0, 360], range=[180, 540]), size=alt.Size("speed").scale(rangeMax=500), ).project("equalEarth") Geo Point ^^^^^^^^^ By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can visualize geographic points. The example below shows major airports in the US. .. altair-plot:: import altair as alt from altair.datasets import data airports = data.airports() states = alt.topo_feature(data.us_10m.url, feature="states") # US states background background = alt.Chart(states).mark_geoshape( fill="lightgray", stroke="white" ).properties( width=500, height=300, ).project("albersUsa") # airport positions on background points = alt.Chart(airports).mark_circle( size=10, color="steelblue", ).encode( longitude="longitude:Q", latitude="latitude:Q", tooltip=["name", "city", "state"], ) background + points ================================================ FILE: doc/user_guide/marks/rect.rst ================================================ .. currentmodule:: altair .. _user-guide-rect-marks: Rect ~~~~ The ``rect`` mark represents an arbitrary rectangle. Rect Mark Properties -------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt x_slider = alt.binding_range(min=1, max=100, step=1, name="x") x_var = alt.param(bind=x_slider, value=25) x2_slider = alt.binding_range(min=1, max=100, step=1, name="x2") x2_var = alt.param(bind=x2_slider, value=75) y_slider = alt.binding_range(min=1, max=100, step=1, name="y") y_var = alt.param(bind=y_slider, value=25) y2_slider = alt.binding_range(min=1, max=100, step=1, name="y2") y2_var = alt.param(bind=y2_slider, value=75) cornerRadius_slider = alt.binding_range(min=0, max=50, step=1) cornerRadius_var = alt.param(bind=cornerRadius_slider, value=0, name="cornerRadius") alt.Chart().mark_rect(cornerRadius=cornerRadius_var, color="orange").encode( x=alt.XDatum(x_var, type="quantitative", scale=alt.Scale(domain=[0, 100])), x2=alt.X2Datum(x2_var), y=alt.XDatum(y_var, type="quantitative", scale=alt.Scale(domain=[0, 100])), y2=alt.X2Datum(y2_var), ).add_params(x_var, x2_var, y_var, y2_var, cornerRadius_var) A ``rect`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: width height align baseline cornerRadius Examples -------- Heatmap ^^^^^^^ Using the ``rect`` marks with discrete fields on ``x`` and ``y`` channels creates a heatmap. .. altair-plot:: import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_rect().encode( alt.X("date(date):O").axis(labelAngle=0, format="%e").title("Day"), alt.Y("month(date):O").title("Month"), alt.Color("max(temp_max):Q").title("Max Temp"), ) Ranged Rectangles ^^^^^^^^^^^^^^^^^ Specifying both ``x`` and ``x2`` and/or ``y`` and ``y2`` creates a rectangle that spans over certain x and/or y values. For example, we can use ``rect`` to create an annotation ``layer`` that provides a shading between global ``min`` and ``max`` values. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() point = alt.Chart(source).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) rect = alt.Chart(source).mark_rect().encode( y="max(Miles_per_Gallon)", y2="min(Miles_per_Gallon)", opacity=alt.value(0.2), ) point + rect ================================================ FILE: doc/user_guide/marks/rule.rst ================================================ .. currentmodule:: altair .. _user-guide-rule-marks: Rule ~~~~ The ``rule`` mark represents each data point as a line segment. It can be used in two ways. First, as a line segment that spans the complete width or height of a view. Second, a rule can be used to draw a line segment between two positions. Rule Mark Properties -------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import pandas as pd x_slider = alt.binding_range(min=1, max=100, step=1) x_var = alt.param(bind=x_slider, value=35, name="x") x2_slider = alt.binding_range(min=1, max=100, step=1) x2_var = alt.param(bind=x2_slider, value=75, name="x2") y_slider = alt.binding_range(min=1, max=100, step=1) y_var = alt.param(bind=y_slider, value=25, name="y") y2_slider = alt.binding_range(min=1, max=100, step=1) y2_var = alt.param(bind=y2_slider, value=75, name="y2") strokeWidth_slider = alt.binding_range(min=0, max=10, step=0.5) strokeWidth_var = alt.param(bind=strokeWidth_slider, value=2, name="strokeWidth") strokeCap_select = alt.binding_select(options=["butt", "round", "square"]) strokeCap_var = alt.param(bind=strokeCap_select, value="butt", name="strokeCap") strokeDash_select = alt.binding_select( options=[[1, 0], [8, 8], [8, 4], [4, 4], [4, 2], [2, 1], [1, 1]] ) strokeDash_var = alt.param(bind=strokeDash_select, value=[1, 0], name="strokeDash") alt.Chart().mark_rule( color="orange", strokeWidth=strokeWidth_var, strokeCap=strokeCap_var, strokeDash=strokeDash_var, ).encode( x=alt.datum(x_var, type="quantitative", scale=alt.Scale(domain=[0, 100])), y=alt.datum(y_var, type="quantitative", scale=alt.Scale(domain=[0, 100])), x2=alt.datum(x2_var), y2=alt.datum(y2_var), ).add_params( x_var, x2_var, y_var, y2_var, strokeWidth_var, strokeCap_var, strokeDash_var, ) A ``rule`` mark definition can contain any :ref:`standard mark properties `. Examples -------- Width/Height-Spanning Rules ^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the ``rule`` mark only has ``y`` encoding, the output view produces horizontal rules that spans the complete width. Similarly, if the ``rule`` mark only has ``x`` encoding, the output view produces vertical rules that spans the height. We can use rules to show the average price of different stocks akin to ``tick`` marks. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_rule().encode( y="mean(price):Q", size=alt.value(2), color="symbol:N" ) The fact that rule marks span the width or the height of a single view make them useful as an annotation layer. For example, we can use rules to show average values of different stocks alongside the price curve. .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source).properties(width=550) line = base.mark_line().encode( x="date", y="price", color="symbol" ) rule = base.mark_rule().encode( y="average(price)", color="symbol", size=alt.value(2) ) line + rule We can also use a rule mark to show global mean value over a histogram. .. altair-plot:: import altair as alt from altair.datasets import data source = data.movies.url base = alt.Chart(source) bar = base.mark_bar().encode( x=alt.X("IMDB Rating:Q").bin().axis(None), y="count()" ) rule = base.mark_rule(color="red").encode( x="mean(IMDB Rating):Q", size=alt.value(5), ) bar + rule Ranged Rules ^^^^^^^^^^^^ To control the spans of horizontal/vertical rules, ``x`` and ``x2``/ ``y`` and ``y2`` channels can be specified. For example, we can use ``y`` and ``y2`` show the ``"min"`` and ``"max"`` values of horsepowers for cars from different locations. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_rule().encode( x="Origin", y="min(Horsepower)", y2="max(Horsepower)", ) Diagonal Line ^^^^^^^^^^^^^ By using ``alt.value`` with the special ``'width'`` and ``'height'`` signals, you can draw a diagonal line that spans the full extent of the chart from corner to corner regardless of the extent of the x and y domains (i.e. the line is drawn in "pixel space"). .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() points = alt.Chart(source).mark_circle(size=60).encode( x=alt.X("Horsepower:Q"), y=alt.Y("Miles_per_Gallon:Q"), ) diagonal = alt.Chart().mark_rule(strokeDash=[4, 4]).encode( x=alt.value(0), y=alt.value("height"), x2=alt.value("width"), y2=alt.value(0), ) (points + diagonal).interactive() Identity Line (x=y) ^^^^^^^^^^^^^^^^^^^^ When comparing two related variables (e.g. predicted vs actual values), drawing a line representing a perfect relationship (i.e. an "identity line" at x=y) can facilitate comparisons. In contrast to the diagonal line above that was drawn in pixel space, we now need to draw the line in data space. Therefore we use ``alt.datum`` instead of ``alt.value``. If our chart is static, we can manually enter the x and y coordinates to draw the identity line. But, if we want to draw an identity line that is robust to zooming and panning, we instead need to compute the line dynamically using the ``domain`` expression which redraws the line each time the range of the x (or y) domain changes. .. altair-plot:: import altair as alt import pandas as pd import numpy as np rng = np.random.RandomState(42) actual = rng.uniform(10, 90, 50) predicted = actual + rng.normal(0, 12, 50) source = pd.DataFrame({"actual": actual, "predicted": predicted}) points = alt.Chart(source).mark_circle(size=60).encode( x=alt.X("actual:Q"), y=alt.Y("predicted:Q"), ) identity_line = alt.Chart().mark_rule(strokeDash=[4, 4]).encode( x=alt.datum(alt.expr("domain('x')[0]"), type="quantitative"), y=alt.datum(alt.expr("domain('x')[0]"), type="quantitative"), x2=alt.datum(alt.expr("domain('x')[1]")), # inherits "quantitative" from x y2=alt.datum(alt.expr("domain('x')[1]")), # inherits "quantitative" from y ) # either the x or y domain could be used to compute the line coordinates (points + identity_line).interactive() ================================================ FILE: doc/user_guide/marks/square.rst ================================================ .. currentmodule:: altair .. _user-guide-square-marks: Square ~~~~~~ ``square`` mark is similar to ``point`` mark, except that (1) the ``shape`` value is always set to ``square`` (2) they are filled by default. Square Mark Properties ---------------------- A ``square`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: size Scatter Plot with Square ------------------------ .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_square().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) ================================================ FILE: doc/user_guide/marks/text.rst ================================================ .. currentmodule:: altair .. _user-guide-text-marks: Text ~~~~~~ ``text`` mark represents each data point with a text instead of a point. Text Mark Properties -------------------- .. altair-plot:: :hide-code: :div_class: properties-example import altair as alt import pandas as pd angle_slider = alt.binding_range(min=-180, max=180, step=1) angle_var = alt.param(bind=angle_slider, value=0, name="angle") dx_slider = alt.binding_range(min=-20, max=20, step=1) dx_var = alt.param(bind=dx_slider, value=5, name="dx") dy_slider = alt.binding_range(min=-20, max=20, step=1) dy_var = alt.param(bind=dy_slider, value=0, name="dy") xOffset_slider = alt.binding_range(min=-20, max=20, step=1) xOffset_var = alt.param(bind=xOffset_slider, value=0, name="xOffset") yOffset_slider = alt.binding_range(min=-20, max=20, step=1) yOffset_var = alt.param(bind=yOffset_slider, value=0, name="yOffset") fontSize_slider = alt.binding_range(min=1, max=36, step=1) fontSize_var = alt.param(bind=fontSize_slider, value=14, name="fontSize") limit_slider = alt.binding_range(min=0, max=150, step=1) limit_var = alt.param(bind=limit_slider, value=0, name="limit") align_select = alt.binding_select(options=["left", "center", "right"]) align_var = alt.param(bind=align_select, value="left", name="align") baseline_select = alt.binding_select(options=["alphabetic", "top", "middle", "bottom"]) baseline_var = alt.param(bind=baseline_select, value="middle", name="baseline") font_select = alt.binding_select(options=["sans-serif", "serif", "monospace"]) font_var = alt.param(bind=font_select, value="sans-serif", name="font") fontWeight_select = alt.binding_select(options=["normal", "bold"]) fontWeight_var = alt.param(bind=fontWeight_select, value="normal", name="fontWeight") fontStyle_select = alt.binding_select(options=["normal", "italic"]) fontStyle_var = alt.param(bind=fontStyle_select, value="normal", name="fontStyle") source = pd.DataFrame( { "a": [30, 25, 70], "b": [28, 65, 43], "label": ["Andy", "Brian", "Charlie"], } ) base = alt.Chart(source).encode( x=alt.X("a:Q").axis(labelAngle=0).scale(domain=[0, 100]), y=alt.Y("b:Q").scale(domain=[0, 100]), ) pts = base.mark_point() text = base.mark_text( dx=dx_var, dy=dy_var, xOffset=xOffset_var, yOffset=yOffset_var, angle=angle_var, align=align_var, baseline=baseline_var, font=font_var, fontSize=fontSize_var, fontStyle=fontStyle_var, fontWeight=fontWeight_var, limit=limit_var, ).encode(text="label:N") (pts + text).add_params( dx_var, dy_var, xOffset_var, yOffset_var, angle_var, align_var, baseline_var, font_var, fontSize_var, fontStyle_var, fontWeight_var, limit_var, ) A ``text`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: angle align baseline dir dx dy ellipsis font fontSize fontStyle fontWeight limit lineHeight radius text theta Examples -------- Text Table Heatmap ^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() base = alt.Chart(source).transform_aggregate( num_cars="count()", groupby=["Origin", "Cylinders"], ).encode( alt.X("Cylinders:O").scale(paddingInner=0), alt.Y("Origin:O").scale(paddingInner=0), ) heatmap = base.mark_rect().encode( alt.Color("num_cars:Q") .scale(scheme="viridis") .legend(direction="horizontal") ) predicate = alt.datum.num_cars > 100 text = base.mark_text(baseline="middle").encode( text="num_cars:Q", color=alt.when(predicate).then(alt.value("black")).otherwise(alt.value("white")), ) heatmap + text Labels ^^^^^^ You can also use ``text`` marks as labels for other marks and set offset (``dx`` or ``dy``), ``align``, and ``baseline`` properties of the mark definition. .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame({ "a": ["A", "B", "C"], "b": [28, 55, 43] }) bar = alt.Chart(source).mark_bar().encode( y="a:N", x=alt.X("b:Q").scale(domain=[0, 60]) ) text = bar.mark_text( align="left", baseline="middle", dx=3 ).encode(text="b") bar + text Labels Position Based on Condition ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ By default, text mark as labels in Altair are positioned above or to the right of the value. However, when dealing with negative values, this default positioning can lead to label overlap with the bar. To address this issue, you can set label positions via :ref:`expressions`. Here's an example demonstrating how to do this: .. altair-plot:: import altair as alt import pandas as pd source = pd.DataFrame({ "a": ["A", "B", "C"], "b": [28, -5, 10] }) bar = alt.Chart(source).mark_bar().encode( y="a:N", x=alt.X("b:Q").scale(domain=[-10, 35]) ) text_conditioned = bar.mark_text( align="left", baseline="middle", dx=alt.expr(alt.expr.if_(alt.datum.b >= 0, 10, -20)) ).encode(text="b") bar + text_conditioned Scatter Plot with Text ^^^^^^^^^^^^^^^^^^^^^^ Mapping a field to ``text`` channel of text mark sets the mark's text value. For example, we can make a colored scatter plot with text marks showing the initial character of its origin, instead of ``point`` marks. .. altair-plot:: import altair as alt from altair.datasets import data from altair import datum source = data.cars() alt.Chart(source).mark_text().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", color="Origin:N", text="Origin[0]:N", ) Geo Text ^^^^^^^^ By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can show text at accurate locations. The example below shows the name of every US state capital at the location of the capital. .. altair-plot:: import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, feature="states") source = data.us_state_capitals() background = alt.Chart(states).mark_geoshape( fill="lightgray", stroke="white", ).properties( width=750, height=500, ).project("albersUsa") line = alt.Chart(source).mark_text(dy=-10).encode( latitude="lat:Q", longitude="lon:Q", text="city:N" ) point = alt.Chart(source).mark_circle().encode( latitude="lat:Q", longitude="lon:Q", color=alt.value("orange"), ) background + line + point ================================================ FILE: doc/user_guide/marks/tick.rst ================================================ .. currentmodule:: altair .. _user-guide-tick-marks: Tick ~~~~ The ``tick`` mark represents each data point as a short line. This is a useful mark for displaying the distribution of values in a field. Tick Mark Properties -------------------- A ``tick`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: cornerRadius orient Examples -------- Dot Plot ^^^^^^^^ The following dot plot uses tick marks to show the distribution of precipitation in Seattle. .. altair-plot:: import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_tick().encode( x="precipitation:Q" ) Strip Plot ^^^^^^^^^^ By adding a ``y`` field, a strip plot can be created that shows the distribution of horsepower by number of cylinders. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_tick().encode( x="Horsepower:Q", y="Cylinders:O", ) Customizing Tick’s Size and Thickness ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_tick().encode( x="precipitation:Q" ).configure_tick( thickness=2, bandSize=10, ) ================================================ FILE: doc/user_guide/marks/trail.rst ================================================ .. currentmodule:: altair .. _user-guide-trail-marks: Trail ~~~~~ The ``trail`` mark represents the data points stored in a field with a line connecting all of these points. Trail is similar to the ``line`` mark but a trail can have variable widths determined by backing data. Unlike lines, trails do not support different interpolation methods and use ``fill`` (not ``stroke``) for their color. Trail marks are useful if you want to draw lines with changing size to reflect the underlying data. Trail Mark Properties --------------------- A ``trail`` mark definition can contain any :ref:`standard mark properties ` and the following special properties: .. altair-object-table:: altair.MarkDef :properties: orient Examples -------- Line Chart with Varying Size ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_trail().encode( x="date", y="price", color="symbol", size="price", ) Comet Chart Showing Changes Between Two States ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.barley.url).transform_pivot( "year", value="yield", groupby=["variety", "site"] ).transform_fold( ["1931", "1932"], as_=["year", "yield"] ).transform_calculate( calculate="datum['1932'] - datum['1931']", as_="delta" ).mark_trail().encode( alt.X("year:O").title(None), alt.Y("variety:N").title("Variety"), alt.Size("yield:Q") .scale(range=[0, 12]) .legend(values=[20, 60]) .title("Barley Yield (bushels/acre)"), alt.Color("delta:Q") .scale(domainMid=0) .title("Yield Delta (%)"), alt.Tooltip(["year:O", "yield:Q"]), alt.Column("site:N").title("Site"), ).configure_legend( orient='bottom', direction='horizontal' ).configure_view( stroke=None ).properties( title="Barley Yield comparison between 1932 and 1931" ) ================================================ FILE: doc/user_guide/saving_charts.rst ================================================ .. currentmodule:: altair .. _user-guide-saving: Saving Altair Charts -------------------- Altair chart objects have a :meth:`Chart.save` method which allows charts to be saved in a variety of formats. .. saving-json: JSON format ~~~~~~~~~~~ The fundamental chart representation output by Altair is a JSON string format; one of the core methods provided by Altair is :meth:`Chart.to_json`, which returns a JSON string that represents the chart content. Additionally, you can save a chart to a JSON file using :meth:`Chart.save`, by passing a filename with a ``.json`` extension. For example, here we save a simple scatter-plot to JSON: .. code-block:: python import altair as alt from altair.datasets import data chart = alt.Chart(data.cars.url).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) chart.save('chart.json') The contents of the resulting file will look something like this: .. code-block:: json { "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "config": { "view": { "continuousHeight": 300, "continuousWidth": 300 } }, "data": { "url": "https://vega.github.io/vega-datasets/data/cars.json" }, "encoding": { "color": { "field": "Origin", "type": "nominal" }, "x": { "field": "Horsepower", "type": "quantitative" }, "y": { "field": "Miles_per_Gallon", "type": "quantitative" } }, "mark": {"type": "point"} } This JSON can then be inserted into any web page using the vegaEmbed_ library. .. saving-html: HTML format ~~~~~~~~~~~ If you wish for Altair to take care of the HTML embedding for you, you can save a chart directly to an HTML file using .. code-block:: python chart.save('chart.html') This will create a simple HTML template page that loads Vega, Vega-Lite, and vegaEmbed, such that when opened in a browser the chart will be rendered. For example, saving the above scatter-plot to HTML creates a file with the following contents, which can be opened and rendered in any modern javascript-enabled web browser: .. code-block:: HTML
You can view the result here: `chart.html `_. By default, ``canvas`` is used for rendering the visualization in vegaEmbed. To change to ``svg`` rendering, use the ``embed_options`` as such: .. code-block:: python chart.save('chart.html', embed_options={'renderer':'svg'}) If you need an HTML string object for further processing in custom HTML reports, you can use the :meth:`Chart.to_html` method: .. code-block:: python html_string = chart.to_html() # Use html_string in your custom HTML generation The :meth:`Chart.to_html` method returns a string containing the HTML representation of the chart, which can be embedded into larger HTML documents or processed programmatically. .. note:: This is not the same as ``alt.renderers.enable('svg')``, what renders the chart as a static ``svg`` image within a Jupyter notebook. Offline HTML support ^^^^^^^^^^^^^^^^^^^^ By default, an HTML file generated by ``chart.save('chart.html')`` loads the necessary JavaScript dependencies from an online CDN location. This results in a small HTML file, but it means that an active internet connection is required in order to display the chart. As an alternative, the ``inline=True`` keyword argument may be provided to ``chart.save`` to generate an HTML file that includes all necessary JavaScript dependencies inline. This results in a larger file size, but HTML files generated this way do not require an active internet connection to display. .. code-block:: python chart.save('chart.html', inline=True) .. note:: Calling ``chart.save`` with ``inline=True`` requires :ref:`additional-dependencies`. .. _saving-png: PNG, SVG, and PDF format ~~~~~~~~~~~~~~~~~~~~~~~~ To save an Altair chart object as a PNG, SVG, or PDF image, you can use .. code-block:: python chart.save('chart.png') chart.save('chart.svg') chart.save('chart.pdf') .. note:: :ref:`additional-dependencies` are required to save charts as images by running the javascript code necessary to interpret the Vega-Lite specification and output it in the form of an image. altair_saver ^^^^^^^^^^^^ .. note:: altair_saver was used in Altair 4 and earlier versions. It is no longer maintained and got superseded by vl-convert_ which provides a superior user experience and performance. PNG Figure Size/Resolution ^^^^^^^^^^^^^^^^^^^^^^^^^^ When using ``chart.save()`` to create a PNG image, the resolution of the resulting image defaults to 72 pixels per inch (ppi). To change the resolution of the image, while maintaining the same physical size, the ``ppi`` argument may be provided to ``chart.save``. For example, to save the image with a resolution of 200 pixels-per-inch:: chart.save('chart.png', ppi=200) To change the physical size of the resulting image while preserving the resolution, the ``scale_factor`` argument may be used. For example, to save the image at double the default size at the default resolution of 72 ppi:: chart.save('chart.png', scale_factor=2) .. _additional-dependencies: Additional Dependencies ~~~~~~~~~~~~~~~~~~~~~~~ Saving charts to images or offline HTML files requires the vl-convert_ package:: conda install -c conda-forge vl-convert-python or:: pip install vl-convert-python vl-convert_ does not require any external dependencies. See the vl-convert documentation for information and for known `limitations `_. Sharable URL ~~~~~~~~~~~~ The :meth:`Chart.to_url` method can be used to build a sharable URL that opens the chart specification in the online Vega editor_. .. altair-plot:: :output: repr import altair as alt from altair.datasets import data chart = alt.Chart(data.cars.url).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ) chart.to_url() .. _vl-convert: https://github.com/vega/vl-convert .. _vegaEmbed: https://github.com/vega/vega-embed .. _editor: https://vega.github.io/editor/ ================================================ FILE: doc/user_guide/scale_resolve.rst ================================================ .. currentmodule:: altair .. _user-guide-resolve: Scale & Guide Resolution ------------------------ When creating compound charts (see :ref:`user-guide-compound`), altair defaults to using shared chart scales and guides (e.g. axes, legends, etc.). This default can be adjusted using the :meth:`Chart.resolve_scale`, :meth:`Chart.resolve_axis`, and :meth:`Chart.resolve_legend` functions. For example, suppose you would like to concatenate two charts with separate color scales; the default behavior is for the color scale to be created for a union of the two color encoding domains: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() base = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q' ).properties( width=200, height=200 ) alt.concat( base.encode(color='Origin:N'), base.encode(color='Cylinders:O') ) This default can be changed by setting the scale resolution for the color to ``"independent"`` (rather than the default, ``"shared"``): .. altair-plot:: alt.concat( base.encode(color='Origin:N'), base.encode(color='Cylinders:O') ).resolve_scale( color='independent' ) Dual Y Axis ~~~~~~~~~~~ A common technique for combining chart containing different measures is using a dual y axis. There are two strategies to achieve this result using altair. The first is to manually specify the mark color and associated axis title color of each layer. .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars() base = alt.Chart(source).encode(x='year(Year):T') line_A = base.mark_line(color='#5276A7').encode( alt.Y('average(Horsepower):Q').axis(titleColor='#5276A7') ) line_B = base.mark_line(color='#F18727').encode( alt.Y('average(Miles_per_Gallon):Q').axis(titleColor='#F18727') ) alt.layer(line_A, line_B).resolve_scale(y='independent') In this case the axis colors act as a pseudo-legend. Alternatively if you want a legend the :ref:`user-guide-filter-transform` and :ref:`user-guide-fold-transform` must be applied. Legends are only created in Vega-Lite to represent an encoding. .. altair-plot:: base = alt.Chart(source).mark_line().transform_fold( ['Horsepower', 'Miles_per_Gallon'], as_=['Measure', 'Value'] ).encode( alt.Color('Measure:N'), alt.X('year(Year):T') ) line_A = base.transform_filter( alt.datum.Measure == 'Horsepower' ).encode( alt.Y('average(Value):Q').title('Horsepower') ) line_B = base.transform_filter( alt.datum.Measure == 'Miles_per_Gallon' ).encode( alt.Y('average(Value):Q').title('Miles_per_Gallon') ) alt.layer(line_A, line_B).resolve_scale(y='independent') Note that dual axis charts might be misleading about relationships in your data. For further reading on the topic see `The case against dual axis charts `__ by Lisa Charlotte Rost. ================================================ FILE: doc/user_guide/times_and_dates.rst ================================================ .. currentmodule:: altair .. _user-guide-time: Times & Dates ============= Working with dates, times, and timezones is often one of the more challenging aspects of data analysis. In Altair, the difficulties are compounded by the fact that users are writing Python code, which outputs JSON-serialized timestamps, which are interpreted by Javascript, and then rendered by your browser. At each of these steps, there are things that can go wrong, but Altair and Vega-Lite do their best to ensure that dates are interpreted and visualized in a consistent way. Altair and pandas Datetimes --------------------------- Altair is designed to work best with `pandas timeseries`_. A standard timezone-agnostic date/time column in a pandas dataframe will be both interpreted and displayed as local user time. For example, here is a dataset containing hourly temperatures measured in Seattle: .. altair-plot:: :output: repr import altair as alt from altair.datasets import data temps = data.seattle_weather_hourly_normals() temps.head() We can see from the ``dtypes`` attribute that the times are encoded as a standard 64-bit datetime, without any specified timezone: .. altair-plot:: :output: repr temps.dtypes We can use Altair to visualize this datetime data; for clarity in this example, we'll limit ourselves to the first two weeks of data: .. altair-plot:: temps = temps[temps.date < '2010-01-15'] alt.Chart(temps).mark_line().encode( x='date:T', y='temperature:Q' ) Notice that for date/time values we use the ``T`` to indicate a temporal encoding: while this is optional for pandas datetime input, it is good practice to specify a type explicitly; see :ref:`encoding-data-types` for more discussion. If you want Altair to plot four digit integers as years, you need to cast them as strings before changing the data type to temporal, please see the :ref:`type-axis-scale` for details. For date-time inputs like these, it can sometimes be useful to extract particular time units (e.g. hours of the day, dates of the month, etc.). In Altair, this can be done with a time unit transform, discussed in detail in :ref:`user-guide-timeunit-transform`. For example, we might decide we want a heatmap with hour of the day on the x-axis, and day of the month on the y-axis: .. altair-plot:: alt.Chart(temps).mark_rect().encode( alt.X('hoursminutes(date):O').title('hour of day'), alt.Y('monthdate(date):O').title('date'), alt.Color('temperature:Q').title('temperature (C)') ) Unless you are using a non-ES6 browser (See :ref:`note-browser-compliance`), you will notice that the chart created by this code reflects hours starting at 00:00:00 on January 1st, just as in the data we input. This is because both the input timestamps and the plot outputs are using local time. Specifying Time Zones --------------------- If you are viewing the above visualizations in a supported browser (see :ref:`note-browser-compliance`), the times are both serialized and rendered in local time, so that the ``January 1st 00:00:00`` row renders in the chart as ``00:00`` on ``January 1st``. In Altair, simple dates without an explicit timezone are treated as local time, and in Vega-Lite, unless otherwise specified, times are rendered in the local time of the browser that does the rendering. If you would like your dates to instead be time-zone aware, you can set the timezone explicitly in the input dataframe. Since Seattle is in the ``US/Pacific`` timezone, we can localize the timestamps in pandas as follows: .. altair-plot:: :output: repr temps['date_pacific'] = temps['date'].dt.tz_localize('US/Pacific') temps.dtypes Notice that the timezone is now part of the pandas datatype. If we repeat the above chart with this timezone-aware data, the result will render **according to the timezone of the browser rendering it**: .. altair-plot:: alt.Chart(temps).mark_rect().encode( alt.X('hoursminutes(date_pacific):O').title('hour of day'), alt.Y('monthdate(date_pacific):O').title('date'), alt.Color('temperature:Q').title('temperature (C)') ) If you are viewing this chart on a computer whose time is set to the west coast of the US, it should appear identical to the first version. If you are rendering the chart in any other timezone, it will render using a timezone correction computed from the location set in your system. .. _explicit-utc-time: Using UTC Time -------------- This user-local rendering can sometimes be confusing, because it leads to the same output being visualized differently by different users. If you want timezone-aware data to appear the same to every user regardless of location, the best approach is to adopt a standard timezone in which to render the data. One commonly-used standard is `Coordinated Universal Time (UTC)`_. In Altair, any of the ``timeUnit`` bins can be prefixed with ``utc`` in order to extract UTC time units. Here is the above chart visualized in UTC time, which will render the same way regardless of the system location: .. altair-plot:: alt.Chart(temps).mark_rect().encode( alt.X('utchoursminutes(date_pacific):O').title('UTC hour of day'), alt.Y('utcmonthdate(date_pacific):O').title('UTC date'), alt.Color('temperature:Q').title('temperature (C)') ) To make your charts as portable as possible (even in non-ES6 browsers which parse timezone-agnostic times as UTC), you can explicitly work in UTC time, both on the pandas side and on the Vega-Lite side: .. altair-plot:: temps['date_utc'] = temps['date'].dt.tz_localize('UTC') alt.Chart(temps).mark_rect().encode( alt.X('utchoursminutes(date_utc):O').title('hour of day'), alt.Y('utcmonthdate(date_utc):O').title('date'), alt.Color('temperature:Q').title('temperature (C)') ) This is somewhat less convenient than the default behavior for timezone-agnostic dates, in which both pandas and Vega-Lite assume times are local (except in non-ES6 browsers; see :ref:`note-browser-compliance`), but it gets around browser incompatibilities by explicitly working in UTC, which gives similar results even in older browsers. .. _note-browser-compliance: Note on Browser Compliance -------------------------- .. note:: Warning about non-ES6 Browsers The discussion below applies to modern browsers which support `ECMAScript 6`_, in which time strings like ``"2018-01-01T12:00:00"`` without a trailing ``"Z"`` are treated as local time rather than `Coordinated Universal Time (UTC)`_. For example, recent versions of Chrome and Firefox are ES6-compliant, while Safari 11 is not. If you are using a non-ES6 browser, this means that times displayed in Altair charts may be rendered with a timezone offset, unless you explicitly use UTC time (see :ref:`explicit-utc-time`). The following chart will help you determine if your browser parses dates in the way that Altair expects: .. altair-plot:: :links: none import altair as alt import pandas as pd df = pd.DataFrame({'local': ['2018-01-01T00:00:00'], 'utc': ['2018-01-01T00:00:00Z']}) when_compliant = alt.when(compliant=True) alt.Chart(df).transform_calculate( compliant="hours(datum.local) != hours(datum.utc) ? true : false", ).mark_text(size=20, baseline="middle").encode( text=when_compliant.then(alt.value("OK")).otherwise(alt.value("not OK")), color=when_compliant.then(alt.value("green")).otherwise(alt.value("red")), ).properties(width=80, height=50) If the above output contains a red "not OK": .. altair-plot:: :hide-code: :links: none alt.Chart(df).mark_text(size=10, baseline='middle').encode( alt.TextValue('not OK'), alt.ColorValue('red') ).properties(width=40, height=25) it means that your browser's date parsing is not ES6-compliant. If it contains a green "OK": .. altair-plot:: :hide-code: :links: none alt.Chart(df).mark_text(size=10, baseline='middle').encode( alt.TextValue('OK'), alt.ColorValue('green') ).properties(width=40, height=25) then it means that your browser parses dates as Altair expects, either because it is ES6-compliant or because your computer locale happens to be set to the UTC+0 (GMT) timezone. .. _Coordinated Universal Time (UTC): https://en.wikipedia.org/wiki/Coordinated_Universal_Time .. _pandas timeseries: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html .. _ECMAScript 6: http://www.ecma-international.org/ecma-262/6.0/ ================================================ FILE: doc/user_guide/transform/aggregate.rst ================================================ .. currentmodule:: altair .. _user-guide-aggregate-transform: Aggregate ~~~~~~~~~ There are two ways to aggregate data within Altair: within the encoding itself, or using a top level aggregate transform. The aggregate property of a field definition can be used to compute aggregate summary statistics (e.g., :code:`median`, :code:`min`, :code:`max`) over groups of data. If any field in the specified encoding channels contains an aggregate, the resulting visualization will show aggregate data. In this case, all fields without a specified aggregation function are treated as group-by fields in the aggregation process. For example, the following bar chart aggregates mean of ``acceleration``, grouped by the number of Cylinders. .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_bar().encode( y='Cylinders:O', x='mean(Acceleration):Q', ) The Altair shorthand string:: # ... x='mean(Acceleration):Q', # ... is made available for convenience, and is equivalent to the longer form:: # ... x=alt.X(field='Acceleration', aggregate='mean', type='quantitative'), # ... For more information on shorthand encodings specifications, see :ref:`shorthand-description`. The same plot can be shown via an explicitly computed aggregation, using the :meth:`~Chart.transform_aggregate` method: .. altair-plot:: alt.Chart(cars).mark_bar().encode( y='Cylinders:O', x='mean_acc:Q' ).transform_aggregate( mean_acc='mean(Acceleration)', groupby=["Cylinders"] ) The alternative to using aggregate functions is to preprocess the data with Pandas, and then plot the resulting DataFrame: .. altair-plot:: cars_df = data.cars() source = ( cars_df.groupby('Cylinders') .Acceleration .mean() .reset_index() .rename(columns={'Acceleration': 'mean_acc'}) ) alt.Chart(source).mark_bar().encode( y='Cylinders:O', x='mean_acc:Q' ) .. note:: Altair transforms are great for quick exploration, while upfront analysis using dedicated dataframe libraries can be faster for large datasets. See :doc:`../data_transformers` for details. Because :code:`Cylinders` is of type :code:`int64` in the :code:`source` DataFrame, Altair would have treated it as a :code:`qualitative` --instead of :code:`ordinal`-- type, had we not specified it. Making the type of data explicit is important since it affects the resulting plot; see :ref:`type-legend-scale` and :ref:`type-axis-scale` for two illustrated examples. As a rule of thumb, it is better to make the data type explicit, instead of relying on an implicit type conversion. Functions Without Arguments ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Aggregate functions can be used without arguments. In such cases, the function operates directly on the input objects and returns the same value regardless of the provided field. The following chart demonstrates this by counting the number of cars with respect to their country of origin. .. altair-plot:: alt.Chart(cars).mark_bar().encode( y='Origin:N', # shorthand form of alt.X(aggregate='count') x='count()' ) .. note:: The :code:`count` aggregate function is of type :code:`quantitative` by default, it does not matter if the source data is a DataFrame, URL pointer, CSV file or JSON file. Functions that handle categorical data (such as :code:`count`, :code:`missing`, :code:`distinct` and :code:`valid`) are the ones that get the most out of this feature. Argmin and Argmax Functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ The :code:`argmin` and :code:`argmax` functions help you find values from one field that correspond to the minimum or maximum values in another field. For example, you might want to find the production budget of movies that earned the highest gross revenue in each genre. These functions must be used with the :meth:`~Chart.transform_aggregate` method rather than their shorthand notations. They return objects that act as selectors for values in other columns, rather than returning values directly. You can think of the returned object as a dictionary where the column serves as a key to retrieve corresponding values. To illustrate this, let's compare the weights of cars with the highest horsepower across different regions of origin: .. altair-plot:: alt.Chart(cars).mark_bar().encode( x='greatest_hp[Weight_in_lbs]:Q', y='Origin:N' ).transform_aggregate( greatest_hp='argmax(Horsepower)', groupby=['Origin'] ) This visualization reveals an interesting contrast: among cars with the highest horsepower in their respective regions, Japanese cars are notably lighter, while American cars are substantially heavier. See :ref:`gallery_line_chart_with_custom_legend` for another example that uses :code:`argmax`. The case of :code:`argmin` is completely similar. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_aggregate` method is built on the :class:`~AggregateTransform` class, which has the following options: .. altair-object-table:: altair.AggregateTransform The :class:`~AggregatedFieldDef` objects have the following options: .. altair-object-table:: altair.AggregatedFieldDef .. _agg-func-table: Aggregation Functions ^^^^^^^^^^^^^^^^^^^^^ In addition to ``count`` and ``average``, there are a large number of available aggregation functions built into Altair; they are listed in the following tables: Count-related Functions """"""""""""""""""""""" ========= =========================================================================== ===================================== Aggregate Description Example ========= =========================================================================== ===================================== count The total count of data objects in the group. :ref:`gallery_simple_heatmap` valid The count of field values that are not null or undefined. N/A missing The count of null or undefined field values. N/A distinct The count of distinct field values. N/A values A list of data objects in the group. N/A ========= =========================================================================== ===================================== Basic Mathematical Operations """"""""""""""""""""""""""""" ========= =========================================================================== ===================================== Aggregate Description Example ========= =========================================================================== ===================================== sum The sum of field values. :ref:`gallery_streamgraph` product The product of field values. N/A ========= =========================================================================== ===================================== Central Tendency Measures """"""""""""""""""""""""" ========= =========================================================================== ===================================== Aggregate Description Example ========= =========================================================================== ===================================== mean The mean (average) field value. :ref:`gallery_scatter_with_layered_histogram` average The mean (average) field value. Identical to mean. :ref:`gallery_layer_line_color_rule` median The median field value :ref:`gallery_boxplot` variance The sample variance of field values. N/A variancep The population variance of field values. N/A stdev The sample standard deviation of field values. N/A stdevp The population standard deviation of field values. N/A stderr The standard error of the field values. N/A ========= =========================================================================== ===================================== Distribution Statistics """"""""""""""""""""""" ========= =========================================================================== ===================================== Aggregate Description Example ========= =========================================================================== ===================================== q1 The lower quartile boundary of values. :ref:`gallery_boxplot` q3 The upper quartile boundary of values. :ref:`gallery_boxplot` ci0 The lower boundary of the bootstrapped 95% confidence interval of the mean. :ref:`gallery_sorted_error_bars_with_ci` ci1 The upper boundary of the bootstrapped 95% confidence interval of the mean. :ref:`gallery_sorted_error_bars_with_ci` ========= =========================================================================== ===================================== Range Functions """"""""""""""" ========= =========================================================================== ===================================== Aggregate Description Example ========= =========================================================================== ===================================== min The minimum field value. :ref:`gallery_boxplot` max The maximum field value. :ref:`gallery_boxplot` argmin An input data object containing the minimum field value. N/A argmax An input data object containing the maximum field value. :ref:`gallery_line_chart_with_custom_legend` ========= =========================================================================== ===================================== ================================================ FILE: doc/user_guide/transform/bin.rst ================================================ .. currentmodule:: altair .. _user-guide-bin-transform: Bin ~~~ As with :ref:`user-guide-aggregate-transform`, there are two ways to apply a bin transform in Altair: within the encoding itself, or using a top-level bin transform. An common application of a bin transform is when creating a histogram: .. altair-plot:: import altair as alt from altair.datasets import data movies = data.movies.url alt.Chart(movies).mark_bar().encode( alt.X("IMDB Rating:Q").bin(), y='count()', ) But a bin transform can be useful in other applications; for example, here we bin a continuous field to create a discrete color map: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.Color('Acceleration:Q').bin(maxbins=5) ) In the first case we use ``bin()`` without any arguments, which uses the default bin settings. In the second case, we exercise more fine-tuned control over the bin parameters by passing the ``maxbins`` argument. If you are using the same bins in multiple chart components, it can be useful to instead define the binning at the top level, using :meth:`~Chart.transform_bin` method. Here is the above histogram created using a top-level bin transform: .. altair-plot:: import altair as alt from altair.datasets import data movies = data.movies.url alt.Chart(movies).mark_bar().encode( x='binned_rating:O', y='count()', ).transform_bin( 'binned_rating', field='IMDB Rating' ) And here is the transformed color scale using a top-level bin transform: .. altair-plot:: import altair as alt from altair.datasets import data cars = data.cars.url alt.Chart(cars).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='binned_acc:O' ).transform_bin( 'binned_acc', 'Acceleration', bin=alt.Bin(maxbins=5) ) The advantage of the top-level transform is that the same named field can be used in multiple places in the chart if desired. Note the slight difference in binning behavior between the encoding-based bins (which preserve the range of the bins) and the transform-based bins (which collapse each bin to a single representative value. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_bin` method is built on the :class:`~BinTransform` class, which has the following options: .. altair-object-table:: altair.BinTransform ================================================ FILE: doc/user_guide/transform/calculate.rst ================================================ .. currentmodule:: altair .. _user-guide-calculate-transform: Calculate ~~~~~~~~~ The calculate transform allows the user to define new fields in the dataset which are calculated from other fields using an expression syntax. As a simple example, here we take data with a simple input sequence, and compute a some trigonometric quantities: .. altair-plot:: import altair as alt import pandas as pd data = pd.DataFrame({'t': range(101)}) alt.Chart(data).mark_line().encode( x='x:Q', y='y:Q', order='t:Q' ).transform_calculate( x='cos(datum.t * PI / 50)', y='sin(datum.t * PI / 25)' ) Each argument within ``transform_calculate`` is a `Vega expression`_ string, which is a well-defined set of javascript-style operations that can be used to calculate a new field from an existing one. To streamline building these Vega expressions in Python, Altair provides the :mod:`expr` module which provides constants and functions to allow these expressions to be constructed with Python syntax; for example: .. altair-plot:: alt.Chart(data).mark_line().encode( x='x:Q', y='y:Q', order='t:Q' ).transform_calculate( x=alt.expr.cos(alt.datum.t * alt.expr.PI / 50), y=alt.expr.sin(alt.datum.t * alt.expr.PI / 25) ) Altair expressions are designed to output valid Vega expressions. The benefit of using them is that proper syntax is ensured by the Python interpreter, and tab completion of the :mod:`~expr` submodule can be used to explore the available functions and constants. These expressions can also be used when constructing a :ref:`user-guide-filter-transform`, as we shall see next. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_calculate` method is built on the :class:`~CalculateTransform` class, which has the following options: .. altair-object-table:: altair.CalculateTransform .. _Vega expression: https://vega.github.io/vega/docs/expressions/ ================================================ FILE: doc/user_guide/transform/density.rst ================================================ .. currentmodule:: altair .. _user-guide-density-transform: Density ~~~~~~~ The density transform performs one-dimensional `kernel density estimation `_ over input data and generates a new column of samples of the estimated densities. Here is a simple example, showing the distribution of IMDB ratings from the movies dataset: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.movies.url).transform_density( 'IMDB Rating', as_=['IMDB Rating', 'density'], ).mark_area().encode( x="IMDB Rating:Q", y='density:Q', ) The density can also be computed on a per-group basis, by specifying the ``groupby`` argument. Here we split the above density computation across movie genres: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart( data.movies.url, width=120, height=80 ).transform_filter( 'isValid(datum.Major_Genre)' ).transform_density( 'IMDB Rating', groupby=['Major_Genre'], as_=['IMDB Rating', 'density'], extent=[1, 10], ).mark_area().encode( x="IMDB Rating:Q", y='density:Q', ).facet( 'Major_Genre:N', columns=4 ) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_density` method is built on the :class:`~DensityTransform` class, which has the following options: .. altair-object-table:: altair.DensityTransform ================================================ FILE: doc/user_guide/transform/extent.rst ================================================ .. currentmodule:: altair .. _user-guide-extent-transform: Extent ~~~~~~ The extent transform can be used to find the extent of a field and stores the result in a :ref:`parameter `. As an example, consider the following dataset: .. altair-plot:: :output: none import pandas as pd df = pd.DataFrame( [ {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}, ] ) We can use the extent transform to extract the minimum and maximum values of column ``b`` and then use those values to place rules: .. altair-plot:: import altair as alt base = alt.Chart(df, title="A Simple Bar Chart with Lines at Extents").transform_extent( extent="b", param="b_extent" ) bars = base.mark_bar().encode(x="b", y="a") lower_extent_rule = base.mark_rule(stroke="firebrick").encode( x=alt.value(alt.expr("scale('x', b_extent[0])")) ) upper_extent_rule = base.mark_rule(stroke="firebrick").encode( x=alt.value(alt.expr("scale('x', b_extent[1])")) ) bars + lower_extent_rule + upper_extent_rule Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_extent` method is built on the :class:`~ExtentTransform` class, which has the following options: .. altair-object-table:: altair.ExtentTransform ================================================ FILE: doc/user_guide/transform/filter.rst ================================================ .. currentmodule:: altair .. _user-guide-filter-transform: Filter ~~~~~~ The filter transform removes objects from a data stream based on a provided filter expression, selection, or other filter predicate. A filter can be added at the top level of a chart using the :meth:`Chart.transform_filter` method. The argument to ``transform_filter`` can be one of a number of expressions and objects: 1. A `Vega expression`_ expressed as a string or built using the :mod:`~expr` module 2. A Field predicate, such as :class:`~FieldOneOfPredicate`, :class:`~FieldRangePredicate`, :class:`~FieldEqualPredicate`, :class:`~FieldLTPredicate`, :class:`~FieldGTPredicate`, :class:`~FieldLTEPredicate`, :class:`~FieldGTEPredicate`, 3. A Selection predicate or object created by :func:`selection` 4. A Logical operand that combines any of the above We'll show a brief example of each of these in the following sections .. _filter-expression: Filter Expression ^^^^^^^^^^^^^^^^^ A filter expression uses the `Vega expression`_ language, either specified directly as a string, or built using the :mod:`~expr` module. This can be useful when, for example, selecting only a subset of data. For example: .. altair-plot:: import altair as alt from altair import datum from altair.datasets import data pop = data.population.url alt.Chart(pop).mark_area().encode( x='age:O', y='people:Q', ).transform_filter( (datum.year == 2000) & (datum.sex == 1) ) Notice that, like in the :ref:`user-guide-filter-transform`, data values are referenced via the name ``datum``. Field Predicates ^^^^^^^^^^^^^^^^ Field predicates overlap somewhat in function with expression predicates, but have the advantage that their contents are validated by the schema. Examples are: - :class:`~FieldEqualPredicate` evaluates whether a field is equal to a particular value - :class:`~FieldOneOfPredicate` evaluates whether a field is among a list of specified values. - :class:`~FieldRangePredicate` evaluates whether a continuous field is within a range of values. - :class:`~FieldLTPredicate` evaluates whether a continuous field is less than a given value - :class:`~FieldGTPredicate` evaluates whether a continuous field is greater than a given value - :class:`~FieldLTEPredicate` evaluates whether a continuous field is less than or equal to a given value - :class:`~FieldGTEPredicate` evaluates whether a continuous field is greater than or equal to a given value Here is an example of a :class:`~FieldEqualPredicate` used to select just the values from year 2000 as in the above chart: .. altair-plot:: import altair as alt from altair.datasets import data pop = data.population.url alt.Chart(pop).mark_line().encode( x='age:O', y='sum(people):Q', color='year:O' ).transform_filter( alt.FieldEqualPredicate(field='year', equal=2000) ) A :class:`~FieldOneOfPredicate` is similar, but allows selection of any number of specific values: .. altair-plot:: import altair as alt from altair.datasets import data pop = data.population.url alt.Chart(pop).mark_line().encode( x='age:O', y='sum(people):Q', color='year:O' ).transform_filter( alt.FieldOneOfPredicate(field='year', oneOf=[1900, 1950, 2000]) ) Finally, a :meth:`~FieldRangePredicate` allows selecting values within a particular continuous range: .. altair-plot:: import altair as alt from altair.datasets import data pop = data.population.url alt.Chart(pop).mark_line().encode( x='age:O', y='sum(people):Q', color='year:O' ).transform_filter( alt.FieldRangePredicate(field='year', range=[1960, 2000]) ) Selection Predicates ^^^^^^^^^^^^^^^^^^^^ Selection predicates can be used to filter data based on a selection. While these can be constructed directly using a :class:`~SelectionPredicate` class, in Altair it is often more convenient to construct them using the :func:`~selection` function. For example, this chart uses a multi-selection that allows the user to click or shift-click on the bars in the bottom chart to select the data to be shown in the top chart: .. altair-plot:: import altair as alt from altair.datasets import data pop = data.population.url selection = alt.selection_point(fields=['year']) top = alt.Chart(width=600, height=200).mark_line().encode( x="age:O", y="sum(people):Q", color="year:O" ).transform_filter( selection ) color = alt.when(selection).then(alt.value("steelblue")).otherwise(alt.value("lightgray")) bottom = alt.Chart(width=600, height=100).mark_bar().encode( x="year:O", y="sum(people):Q", color=color ).add_params( selection ) alt.vconcat(top, bottom, data=pop) Logical Operands ^^^^^^^^^^^^^^^^ At times it is useful to combine several types of predicates into a single selection. We can use ``&``, ``|`` and ``~`` for respectively ``AND``, ``OR`` and ``NOT`` logical composition operands. For example, here we wish to plot US population distributions for all data *except* the years *1950-1960*. First, we use a :class:`~FieldRangePredicate` to select *1950-1960*: .. altair-plot:: :output: none import altair as alt from altair.datasets import data source = data.population.url chart = alt.Chart(source).mark_line().encode( x="age:O", y="sum(people):Q", color="year:O" ).properties( width=600, height=200 ) between_1950_60 = alt.FieldRangePredicate(field="year", range=[1950, 1960]) Then, we can *invert* this selection using ``~``: .. altair-plot:: # NOT between 1950-1960 chart.transform_filter(~between_1950_60) We can further refine our filter by *composing* multiple predicates together. In this case, using ``datum``: .. altair-plot:: chart.transform_filter(~between_1950_60 & (datum.age <= 70)) When passing multiple predicates they will be reduced with ``&``: .. altair-plot:: chart.transform_filter(datum.year > 1980, datum.age != 90) Using keyword-argument ``constraints`` can simplify our first example in :ref:`filter-expression`: .. altair-plot:: alt.Chart(source).mark_area().encode( x="age:O", y="people:Q", ).transform_filter(year=2000, sex=1) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_filter` method is built on the :class:`~FilterTransform` class, which has the following options: .. altair-object-table:: altair.FilterTransform .. _Vega expression: https://vega.github.io/vega/docs/expressions/ ================================================ FILE: doc/user_guide/transform/flatten.rst ================================================ .. currentmodule:: altair .. _user-guide-flatten-transform: Flatten ~~~~~~~ The flatten transform can be used to extract the contents of arrays from data entries. This will not generally be useful for well-structured data within pandas dataframes, but it can be useful for working with data from other sources. As an example, consider this dataset which uses a common convention in JSON data, a set of fields each containing a list of entries: .. altair-plot:: :output: none import numpy as np rand = np.random.RandomState(0) def generate_data(N): mean = rand.randn() std = rand.rand() return list(rand.normal(mean, std, N)) data = [ {'label': 'A', 'values': generate_data(20)}, {'label': 'B', 'values': generate_data(30)}, {'label': 'C', 'values': generate_data(40)}, {'label': 'D', 'values': generate_data(50)}, ] This kind of data structure does not work well in the context of dataframe representations, as we can see by loading this into pandas: .. altair-plot:: :output: repr import pandas as pd df = pd.DataFrame.from_records(data) df Alair's flatten transform allows you to extract the contents of these arrays into a column that can be referenced by an encoding: .. altair-plot:: import altair as alt alt.Chart(df).transform_flatten( ['values'] ).mark_tick().encode( x='values:Q', y='label:N', ) This can be particularly useful in cleaning up data specified via a JSON URL, without having to first load the data for manipulation in pandas. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_flatten` method is built on the :class:`~FlattenTransform` class, which has the following options: .. altair-object-table:: altair.FlattenTransform ================================================ FILE: doc/user_guide/transform/fold.rst ================================================ .. currentmodule:: altair .. _user-guide-fold-transform: Fold ~~~~ The fold transform is, in short, a way to convert wide-form data to long-form data directly without any preprocessing (see :ref:`data-long-vs-wide` for more information). Fold transforms are the opposite of the :ref:`user-guide-pivot-transform`. So, for example, if your data consist of multiple columns that record parallel data for different categories, you can use the fold transform to encode based on those categories: .. altair-plot:: import numpy as np import pandas as pd import altair as alt rand = np.random.RandomState(0) data = pd.DataFrame({ 'date': pd.date_range('2019-01-01', freq='D', periods=30), 'A': rand.randn(30).cumsum(), 'B': rand.randn(30).cumsum(), 'C': rand.randn(30).cumsum(), }) alt.Chart(data).transform_fold( ['A', 'B', 'C'], ).mark_line().encode( x='date:T', y='value:Q', color='key:N' ) Notice here that the fold transform essentially stacks all the values from the specified columns into a single new field named ``"value"``, with the associated names in a field named ``"key"``. For an example of the fold transform in action, see :ref:`gallery_parallel_coordinates`. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_fold` method is built on the :class:`~FoldTransform` class, which has the following options: .. altair-object-table:: altair.FoldTransform ================================================ FILE: doc/user_guide/transform/impute.rst ================================================ .. currentmodule:: altair .. _user-guide-impute-transform: Impute ~~~~~~ The impute transform allows you to fill-in missing entries in a dataset. As an example, consider the following data, which includes missing values that we filter-out of the long-form representation (see :ref:`data-long-vs-wide` for more on this): .. altair-plot:: :output: repr import numpy as np import pandas as pd data = pd.DataFrame({ 't': range(5), 'x': [2, np.nan, 3, 1, 3], 'y': [5, 7, 5, np.nan, 4] }).melt('t').dropna() data Notice the result: the ``x`` series has no entry at ``t=1``, and the ``y`` series has a missing entry at ``t=3``. If we use Altair to visualize this data directly, the line skips the missing entries: .. altair-plot:: import altair as alt raw = alt.Chart(data).mark_line(point=True).encode( x='t:Q', y='value:Q', color='variable:N' ) raw This is not always desirable, because (particularly for a line plot with no points) it can imply the existence of data that is not there. Impute via Encodings ^^^^^^^^^^^^^^^^^^^^ To address this, you can use the impute method of the encoding channel. For example, we can impute using a constant value (we'll show the raw chart lightly in the background for reference): .. altair-plot:: background = raw.encode(opacity=alt.value(0.2)) chart = alt.Chart(data).mark_line(point=True).encode( x='t:Q', y=alt.Y('value:Q').impute(value=0), color='variable:N' ) background + chart Or we can impute using any supported aggregate: .. altair-plot:: chart = alt.Chart(data).mark_line(point=True).encode( x='t:Q', y=alt.Y('value:Q').impute(method='mean'), color='variable:N' ) background + chart Impute via Transform ^^^^^^^^^^^^^^^^^^^^ Similar to the :ref:`user-guide-bin-transform` and :ref:`user-guide-aggregate-transform`, it is also possible to specify the impute transform outside the encoding as a transform. For example, here is the equivalent of the above two charts: .. altair-plot:: chart = alt.Chart(data).transform_impute( impute='value', key='t', value=0, groupby=['variable'] ).mark_line(point=True).encode( x='t:Q', y='value:Q', color='variable:N' ) background + chart .. altair-plot:: chart = alt.Chart(data).transform_impute( impute='value', key='t', method='mean', groupby=['variable'] ).mark_line(point=True).encode( x='t:Q', y='value:Q', color='variable:N' ) background + chart If you would like to use more localized imputed values, you can specify a ``frame`` parameter similar to the :ref:`user-guide-window-transform` that will control which values are used for the imputation. For example, here we impute missing values using the mean of the neighboring points on either side: .. altair-plot:: chart = alt.Chart(data).transform_impute( impute='value', key='t', method='mean', frame=[-1, 1], groupby=['variable'] ).mark_line(point=True).encode( x='t:Q', y='value:Q', color='variable:N' ) background + chart Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_impute` method is built on the :class:`~ImputeTransform` class, which has the following options: .. altair-object-table:: altair.ImputeTransform ================================================ FILE: doc/user_guide/transform/index.rst ================================================ .. currentmodule:: altair .. _user-guide-transformations: Data Transformations -------------------- It is often necessary to transform or filter data in the process of visualizing it. In Altair you can do this one of two ways: 1. Before the chart definition, using standard pandas data transformations. 2. Within the chart definition, using Vega-Lite's data transformation tools. In most cases, we suggest that you use the first approach, because it is more straightforward to those who are familiar with data manipulation in Python, and because the pandas package offers much more flexibility than Vega-Lite in available data manipulations. The second approach becomes useful when the data source is not a dataframe, but, for example, a URL pointer to a JSON or CSV file. It can also be useful in a compound chart where different views of the dataset require different transformations. This second approach -- specifying data transformations within the chart specification itself -- can be accomplished using the ``transform_*`` methods of top-level objects: ========================================= ========================================= ================================================================================ Transform Method Description ========================================= ========================================= ================================================================================ :ref:`user-guide-aggregate-transform` :meth:`~Chart.transform_aggregate` Create a new data column by aggregating an existing column. :ref:`user-guide-bin-transform` :meth:`~Chart.transform_bin` Create a new data column by binning an existing column. :ref:`user-guide-calculate-transform` :meth:`~Chart.transform_calculate` Create a new data column using an arithmetic calculation on an existing column. :ref:`user-guide-density-transform` :meth:`~Chart.transform_density` Create a new data column with the kernel density estimate of the input. :ref:`user-guide-extent-transform` :meth:`~Chart.transform_extent` Find the extent of a field and store the result in a parameter. :ref:`user-guide-filter-transform` :meth:`~Chart.transform_filter` Select a subset of data based on a condition. :ref:`user-guide-flatten-transform` :meth:`~Chart.transform_flatten` Flatten array data into columns. :ref:`user-guide-fold-transform` :meth:`~Chart.transform_fold` Convert wide-form data into long-form data (opposite of pivot). :ref:`user-guide-impute-transform` :meth:`~Chart.transform_impute` Impute missing data. :ref:`user-guide-joinaggregate-transform` :meth:`~Chart.transform_joinaggregate` Aggregate transform joined to original data. :ref:`user-guide-loess-transform` :meth:`~Chart.transform_loess` Create a new column with LOESS smoothing of data. :ref:`user-guide-lookup-transform` :meth:`~Chart.transform_lookup` One-sided join of two datasets based on a lookup key. :ref:`user-guide-pivot-transform` :meth:`~Chart.transform_pivot` Convert long-form data into wide-form data (opposite of fold). :ref:`user-guide-quantile-transform` :meth:`~Chart.transform_quantile` Compute empirical quantiles of a dataset. :ref:`user-guide-regression-transform` :meth:`~Chart.transform_regression` Fit a regression model to a dataset. :ref:`user-guide-sample-transform` :meth:`~Chart.transform_sample` Random sub-sample of the rows in the dataset. :ref:`user-guide-stack-transform` :meth:`~Chart.transform_stack` Compute stacked version of values. :ref:`user-guide-timeunit-transform` :meth:`~Chart.transform_timeunit` Discretize/group a date by a time unit (day, month, year, etc.) :ref:`user-guide-window-transform` :meth:`~Chart.transform_window` Compute a windowed aggregation ========================================= ========================================= ================================================================================ Accessing Transformed Data ~~~~~~~~~~~~~~~~~~~~~~~~~~ When charts are displayed, data transformations are performed in the browser by the Vega JavaScript library. It's often helpful to inspect transformed data results in the process of building a chart. One approach is to display the transformed data results in a table composed of :ref:`Text` marks as in the :ref:`gallery_scatter_linked_table` gallery example. While this approach works, it's somewhat cumbersome, and still does not make it possible to access the transformed data from Python. To make transformed data results available in Python, Altair provides the :meth:`~Chart.transformed_data` Chart method which integrates with `VegaFusion `_ to evaluate data transformations in the Python kernel. First, install VegaFusion with the embed extras enabled. .. code-block:: none pip install "vegafusion[embed]" Then create an Altair chart and call the :meth:`~Chart.transformed_data` method to extract a pandas DataFrame containing the transformed data. .. altair-plot:: :output: repr import altair as alt from altair.datasets import data cars = data.cars.url chart = alt.Chart(cars).mark_bar().encode( y='Cylinders:O', x='mean_acc:Q' ).transform_aggregate( mean_acc='mean(Acceleration)', groupby=["Cylinders"] ) chart.transformed_data() The :meth:`~Chart.transformed_data` method currently supports most, but not all, of Altair's transforms. See the table below. ========================================= ========= Transform Supported ========================================= ========= :ref:`user-guide-aggregate-transform` ✔ :ref:`user-guide-bin-transform` ✔ :ref:`user-guide-calculate-transform` ✔ :ref:`user-guide-density-transform` :ref:`user-guide-extent-transform` ✔ :ref:`user-guide-filter-transform` ✔ :ref:`user-guide-flatten-transform` :ref:`user-guide-fold-transform` ✔ :ref:`user-guide-impute-transform` ✔ :ref:`user-guide-joinaggregate-transform` ✔ :ref:`user-guide-loess-transform` :ref:`user-guide-lookup-transform` :ref:`user-guide-pivot-transform` ✔ :ref:`user-guide-quantile-transform` :ref:`user-guide-regression-transform` :ref:`user-guide-sample-transform` :ref:`user-guide-stack-transform` ✔ :ref:`user-guide-timeunit-transform` ✔ :ref:`user-guide-window-transform` ✔ ========================================= ========= .. toctree:: :hidden: aggregate bin calculate density extent filter flatten fold impute joinaggregate lookup loess pivot quantile regression sample stack timeunit window ================================================ FILE: doc/user_guide/transform/joinaggregate.rst ================================================ .. currentmodule:: altair .. _user-guide-joinaggregate-transform: Join Aggregate ~~~~~~~~~~~~~~ The Join Aggregate transform acts in almost every way the same as an Aggregate transform, but the resulting aggregate is joined to the original dataset. To make this more clear, consider the following dataset: .. altair-plot:: :output: repr import pandas as pd import numpy as np rand = np.random.RandomState(0) df = pd.DataFrame({ 'label': rand.choice(['A', 'B', 'C'], 10), 'value': rand.randn(10), }) df Here is a pandas operation that is equivalent to Altair's Aggregate transform, using the mean as an example: .. altair-plot:: :output: repr mean = df.groupby('label').mean().reset_index() mean And here is an output that is equivalent to Altair's Join Aggregate: .. altair-plot:: :output: repr pd.merge(df, mean, on='label', suffixes=['', '_mean']) Notice that the join aggregate joins the aggregated value with the original dataframe, such that the aggregated values can be used in tandem with the original values if desired. Here is an example of how the join aggregate might be used: we compare the IMDB and Rotten Tomatoes movie ratings, normalized by their mean and standard deviation, which requires calculations on the joined data: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.movies.url).transform_filter( 'datum["IMDB Rating"] != null && datum["Rotten Tomatoes Rating"] != null' ).transform_joinaggregate( IMDB_mean='mean(IMDB Rating)', IMDB_std='stdev(IMDB Rating)', RT_mean='mean(Rotten Tomatoes Rating)', RT_std='stdev(Rotten Tomatoes Rating)' ).transform_calculate( IMDB_Deviation='(datum["IMDB Rating"] - datum.IMDB_mean) / datum.IMDB_std', Rotten_Tomatoes_Deviation='(datum["Rotten Tomatoes Rating"] - datum.RT_mean) / datum.RT_std' ).mark_point().encode( x='IMDB_Deviation:Q', y="Rotten_Tomatoes_Deviation:Q" ) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_joinaggregate` method is built on the :class:`~JoinAggregateTransform` class, which has the following options: .. altair-object-table:: altair.JoinAggregateTransform ================================================ FILE: doc/user_guide/transform/loess.rst ================================================ .. currentmodule:: altair .. _user-guide-loess-transform: LOESS ~~~~~ The LOESS transform (LOcally Estimated Scatterplot Smoothing) uses a locally-estimated regression to produce a trend line. LOESS performs a sequence of local weighted regressions over a sliding window of nearest-neighbor points. For standard parametric regression options, see the :ref:`user-guide-regression-transform`. Here is an example of using LOESS to smooth samples from a Gaussian random walk: .. altair-plot:: import altair as alt import pandas as pd import numpy as np np.random.seed(42) df = pd.DataFrame({ 'x': range(100), 'y': np.random.randn(100).cumsum() }) chart = alt.Chart(df).mark_point().encode( x='x', y='y' ) chart + chart.transform_loess('x', 'y').mark_line() Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_loess` method is built on the :class:`~LoessTransform` class, which has the following options: .. altair-object-table:: altair.LoessTransform ================================================ FILE: doc/user_guide/transform/lookup.rst ================================================ .. currentmodule:: altair .. _user-guide-lookup-transform: Lookup ~~~~~~ The Lookup transform extends a primary data source by looking up values from another data source; it is similar to a one-sided join. A lookup can be added at the top level of a chart using the :meth:`Chart.transform_lookup` method. By way of example, imagine you have two sources of data that you would like to combine and plot: one is a list of names of people along with their height and weight, and the other is some information about which groups they belong to. This example data is available in the `altair.datasets` package: .. altair-plot:: :output: none from altair.datasets import data people = data.lookup_people() groups = data.lookup_groups() We know how to visualize each of these datasets separately; for example: .. altair-plot:: import altair as alt top = alt.Chart(people).mark_square(size=200).encode( x=alt.X('age:Q').scale(zero=False), y=alt.Y('height:Q').scale(zero=False), color='name:N', tooltip='name:N' ).properties( width=400, height=200 ) bottom = alt.Chart(groups).mark_rect().encode( x='person:N', y='group:O' ).properties( width=400, height=100 ) alt.vconcat(top, bottom) If we would like to plot features that reference both datasets (for example, the average age within each group), we need to combine the two datasets. This can be done either as a data preprocessing step, using tools available in pandas, or as part of the visualization using a :class:`~LookupTransform` in Altair. Combining Datasets with pandas.merge ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ pandas provides a wide range of tools for merging and joining datasets; see `Merge, Join, and Concatenate `_ for some detailed examples. For the above data, we can merge the data and create a combined chart as follows: .. altair-plot:: import pandas as pd merged = pd.merge(groups, people, how='left', left_on='person', right_on='name') alt.Chart(merged).mark_bar().encode( x='mean(age):Q', y='group:O' ) We specify a left join, meaning that for each entry of the "person" column in the groups, we seek the "name" column in people and add the entry to the data. From this, we can easily create a bar chart representing the mean age in each group. Combining Datasets with a Lookup Transform ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ For some data sources (e.g. data available at a URL, or data that is streaming), it is desirable to have a means of joining data without having to download it for pre-processing in pandas. This is where Altair's :meth:`~Chart.transform_lookup` comes in. To reproduce the above combined plot by combining datasets within the chart specification itself, we can do the following: .. altair-plot:: alt.Chart(groups).mark_bar().encode( x='mean(age):Q', y='group:O' ).transform_lookup( lookup='person', from_=alt.LookupData(data=people, key='name', fields=['age', 'height']) ) Here ``lookup`` names the field in the groups dataset on which we will match, and the ``from_`` argument specifies a :class:`~LookupData` structure where we supply the second dataset, the lookup key, and the fields we would like to extract. Example: Lookup Transforms for Geographical Visualization ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Lookup transforms are often particularly important for geographic visualization, where it is common to combine tabular datasets with datasets that specify geographic boundaries to be visualized; for example, here is a visualization of unemployment rates per county in the US: .. altair-plot:: import altair as alt from altair.datasets import data counties = alt.topo_feature(data.us_10m.url, 'counties') unemp_data = data.unemployment.url alt.Chart(counties).mark_geoshape().encode( color='rate:Q' ).transform_lookup( lookup='id', from_=alt.LookupData(unemp_data, 'id', ['rate']) ).properties( projection={'type': 'albersUsa'}, width=500, height=300 ) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_lookup` method is built on the :class:`~LookupTransform` class, which has the following options: .. altair-object-table:: altair.LookupTransform ================================================ FILE: doc/user_guide/transform/pivot.rst ================================================ .. currentmodule:: altair .. _user-guide-pivot-transform: Pivot ~~~~~ The pivot transform is, in short, a way to convert long-form data to wide-form data directly without any preprocessing (see :ref:`data-long-vs-wide` for more information). Pivot transforms are useful for creating matrix or cross-tabulation data, acting as an inverse to the :ref:`user-guide-fold-transform`. Here is an example, using Olympic medals data: .. altair-plot:: import altair as alt import pandas as pd df = pd.DataFrame.from_records([ {"country": "Norway", "type": "gold", "count": 14}, {"country": "Norway", "type": "silver", "count": 14}, {"country": "Norway", "type": "bronze", "count": 11}, {"country": "Germany", "type": "gold", "count": 14}, {"country": "Germany", "type": "silver", "count": 10}, {"country": "Germany", "type": "bronze", "count": 7}, {"country": "Canada", "type": "gold", "count": 11}, {"country": "Canada", "type": "silver", "count": 8}, {"country": "Canada", "type": "bronze", "count": 10} ]) alt.Chart(df).transform_pivot( 'type', groupby=['country'], value='count' ).mark_bar().encode( x='gold:Q', y='country:N', ) The pivot transform, when combined with other elements of the Altair grammar, enables some very interesting chart types. For example, here we use pivot to create a single tooltip for values on multiple lines: .. altair-plot:: import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source).encode(x='date:T') columns = sorted(source.symbol.unique()) selection = alt.selection_point( fields=['date'], nearest=True, on='pointerover', empty=False, clear='pointerout' ) lines = base.mark_line().encode(y='price:Q', color='symbol:N') points = lines.mark_point().transform_filter(selection) rule = base.transform_pivot( 'symbol', value='price', groupby=['date'] ).mark_rule().encode( opacity=alt.when(selection).then(alt.value(0.3)).otherwise(alt.value(0)), tooltip=[alt.Tooltip(c, type='quantitative') for c in columns] ).add_params(selection) lines + points + rule Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_pivot` method is built on the :class:`~PivotTransform` class, which has the following options: .. altair-object-table:: altair.PivotTransform ================================================ FILE: doc/user_guide/transform/quantile.rst ================================================ .. currentmodule:: altair .. _user-guide-quantile-transform: Quantile ~~~~~~~~ The quantile transform calculates empirical `quantile `_ values for input data. If a groupby parameter is provided, quantiles are estimated separately per group. Among other uses, the quantile transform is useful for creating `quantile-quantile (Q-Q) plots `_. Here is an example of a quantile plot of normally-distributed data: .. altair-plot:: import altair as alt import pandas as pd import numpy as np np.random.seed(42) df = pd.DataFrame({'x': np.random.randn(200)}) alt.Chart(df).transform_quantile( 'x', step=0.01 ).mark_point().encode( x='prob:Q', y='value:Q' ) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_quantile` method is built on the :class:`~QuantileTransform` class, which has the following options: .. altair-object-table:: altair.QuantileTransform ================================================ FILE: doc/user_guide/transform/regression.rst ================================================ .. currentmodule:: altair .. _user-guide-regression-transform: Regression ~~~~~~~~~~ The regression transform fits two-dimensional regression models to smooth and predict data. This transform can fit multiple models for input data (one per group) and generates new data objects that represent points for summary trend lines. Alternatively, this transform can be used to generate a set of objects containing regression model parameters, one per group. This transform supports parametric models for the following functional forms: - linear (``linear``): *y = a + b * x* - logarithmic (``log``): *y = a + b * log(x)* - exponential (``exp``): *y = a * e^(b * x)* - power (``pow``): *y = a * x^b* - quadratic (``quad``): *y = a + b * x + c * x^2* - polynomial (``poly``): *y = a + b * x + … + k * x^(order)* All models are fit using ordinary least squares. For non-parametric locally weighted regression, see the :ref:`user-guide-loess-transform`. Here is an example of a simple linear regression plotted on top of data: .. altair-plot:: import altair as alt import pandas as pd import numpy as np np.random.seed(42) x = np.linspace(0, 10) y = x - 5 + np.random.randn(len(x)) df = pd.DataFrame({'x': x, 'y': y}) chart = alt.Chart(df).mark_point().encode( x='x', y='y' ) chart + chart.transform_regression('x', 'y').mark_line() Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_regression` method is built on the :class:`~RegressionTransform` class, which has the following options: .. altair-object-table:: altair.RegressionTransform ================================================ FILE: doc/user_guide/transform/sample.rst ================================================ .. currentmodule:: altair .. _user-guide-sample-transform: Sample ~~~~~~ The sample transform is one of the simpler of all Altair's data transforms; it takes a single parameter ``sample`` which specified a number of rows to randomly choose from the dataset. The resulting chart will be created using only this random subset of the data. For example, here we chart the full cars dataset alongside a sample of 100 rows: .. altair-plot:: import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N' ).properties( width=200, height=200 ) chart | chart.transform_sample(100) Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_sample` method is built on the :class:`~SampleTransform` class, which has the following options: .. altair-object-table:: altair.SampleTransform ================================================ FILE: doc/user_guide/transform/stack.rst ================================================ .. currentmodule:: altair .. _user-guide-stack-transform: Stack ~~~~~ The stack transform allows you to compute values associated with stacked versions of encodings. For example, consider this stacked bar chart: .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( column='year:O', x='yield:Q', y='variety:N', color='site:N' ).properties(width=220) Implicitly, this data is being grouped and stacked, but what if you would like to access those stacked values directly? We can construct that same chart manually using the stack transform: .. altair-plot:: import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).transform_stack( stack='yield', as_=['yield_1', 'yield_2'], groupby=['year', 'variety'], sort=[alt.SortField('site', 'descending')] ).mark_bar().encode( column='year:O', x=alt.X('yield_1:Q').title('yield'), x2='yield_2:Q', y='variety:N', color='site:N', tooltip=['site', 'yield', 'variety'] ).properties(width=220) Notice that the bars are now explicitly drawn between values computed and specified within the x and x2 encodings. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_stack` method is built on the :class:`~StackTransform` class, which has the following options: .. altair-object-table:: altair.StackTransform ================================================ FILE: doc/user_guide/transform/timeunit.rst ================================================ .. currentmodule:: altair .. _user-guide-timeunit-transform: TimeUnit ~~~~~~~~ TimeUnit transforms are used to discretize dates and times within Altair. As with the :ref:`user-guide-aggregate-transform` and :ref:`user-guide-bin-transform` discussed above, they can be defined either as part of the encoding, or as a top-level transform. These are the available time units: - ``"year"``, ``"yearquarter"``, ``"yearquartermonth"``, ``"yearmonth"``, ``"yearmonthdate"``, ``"yearmonthdatehours"``, ``"yearmonthdatehoursminutes"``, ``"yearmonthdatehoursminutesseconds"``. - ``"quarter"``, ``"quartermonth"`` - ``"month"``, ``"monthdate"`` - ``"date"`` (Day of month, i.e., 1 - 31) - ``"day"`` (Day of week, i.e., Monday - Friday) - ``"hours"``, ``"hoursminutes"``, ``"hoursminutesseconds"`` - ``"minutes"``, ``"minutesseconds"`` - ``"seconds"``, ``"secondsmilliseconds"`` - ``"milliseconds"`` TimeUnit Within Encoding ^^^^^^^^^^^^^^^^^^^^^^^^ Any temperatureoral field definition can include a ``timeUnit`` argument to discretize the temperatureoral data. For example, here we plot a dataset that consists of hourly temperatureerature measurements in Seattle during the year 2010: .. altair-plot:: import altair as alt from altair.datasets import data temperatures = data.seattle_weather_hourly_normals.url alt.Chart(temperatures).mark_line().encode( x='date:T', y='temperature:Q' ) The plot is too busy due to the amount of data points squeezed into the short time; we can make it a bit cleaner by discretizing it, for example, by month and plotting only the mean monthly temperatureerature: .. altair-plot:: alt.Chart(temperatures).mark_line().encode( x='month(date):T', y='mean(temperature):Q' ) Notice that by default timeUnit output is a continuous quantity; if you would instead like it to be a categorical, you can specify the ordinal (``O``) or nominal (``N``) type. This can be useful when plotting a bar chart or other discrete chart type: .. altair-plot:: alt.Chart(temperatures).mark_bar().encode( x='month(date):O', y='mean(temperature):Q' ) Multiple time units can be combined within a single plot to yield interesting views of your data; for example, here we extract both the month and the day to give a profile of Seattle temperatureeratures through the year: .. altair-plot:: alt.Chart(temperatures).mark_rect().encode( alt.X('date(date):O').title('day'), alt.Y('month(date):O').title('month'), color='max(temperature):Q' ).properties( title="2010 Daily High Temperatures in Seattle (C)" ) TimeUnit as a Transform ^^^^^^^^^^^^^^^^^^^^^^^ Other times it is convenient to specify a timeUnit as a top-level transform, particularly when the value may be reused. This can be done most conveniently using the :meth:`Chart.transform_timeunit` method. For example: .. altair-plot:: alt.Chart(temperatures).mark_line().encode( alt.X('month:T').axis(format='%b'), y='mean(temperature):Q' ).transform_timeunit( month='month(date)' ) Notice that because the ``timeUnit`` is not part of the encoding channel here, it is often necessary to add an axis formatter to ensure appropriate axis labels. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_timeunit` method is built on the :class:`~TimeUnitTransform` class, which has the following options: .. altair-object-table:: altair.TimeUnitTransform ================================================ FILE: doc/user_guide/transform/window.rst ================================================ .. currentmodule:: altair .. _user-guide-window-transform: Window ~~~~~~ The window transform performs calculations over sorted groups of data objects. These calculations include ranking, lead/lag analysis, and aggregates such as cumulative sums and averages. Calculated values are written back to the input data stream, where they can be referenced by encodings. For example, consider the following cumulative frequency distribution: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.movies.url).transform_aggregate( count='count(*)', groupby=['IMDB Rating'] ).transform_window( sort=[{'field': 'IMDB Rating'}], frame=[None, 0], cumulative_count='sum(count)', ).mark_area().encode( x='IMDB Rating:Q', y='cumulative_count:Q', ) First, we aggregate the data to count movies grouped by IMDB Rating. This creates a dataset where each row represents a unique IMDB Rating and its count. Next, we apply the window transform with a sort field definition, which indicates how data objects should be sorted within the window. Here, movies should be sorted by their IMDB rating. We then pass the frame, which indicates how many data objects before and after the current data object should be included within the window. Here, all movies up to and including the current movie should be included. Finally, we pass a window field definition, which indicates how data objects should be aggregated within the window. Here, we sum the counts to create a cumulative count. There are many aggregation functions built into Altair. As well as those given in :ref:`agg-func-table`, we can use the following within window field definitions: ============ ========= ========================================================================================================================================================================================================================================================================================================================= Aggregate Parameter Description ============ ========= ========================================================================================================================================================================================================================================================================================================================= row_number None Assigns each data object a consecutive row number, starting from 1. rank None Assigns a rank order value to each data object in a window, starting from 1. Peer values are assigned the same rank. Subsequent rank scores incorporate the number of prior values. For example, if the first two values tie for rank 1, the third value is assigned rank 3. dense_rank None Assigns dense rank order values to each data object in a window, starting from 1. Peer values are assigned the same rank. Subsequent rank scores do not incorporate the number of prior values. For example, if the first two values tie for rank 1, the third value is assigned rank 2. percent_rank None Assigns a percentage rank order value to each data object in a window. The percent is calculated as (rank - 1) / (group_size - 1). cume_dist None Assigns a cumulative distribution value between 0 and 1 to each data object in a window. ntile Number Assigns a quantile (e.g., percentile) value to each data object in a window. Accepts an integer parameter indicating the number of buckets to use (e.g., 100 for percentiles, 5 for quintiles). lag Number Assigns a value from the data object that precedes the current object by a specified number of positions. If no such object exists, assigns ``null``. Accepts an offset parameter (default ``1``) that indicates the number of positions. This operation must have a corresponding entry in the `fields` parameter array. lead Number Assigns a value from the data object that follows the current object by a specified number of positions. If no such object exists, assigns ``null``. Accepts an offset parameter (default ``1``) that indicates the number of positions. This operation must have a corresponding entry in the `fields` parameter array. first_value None Assigns a value from the first data object in the current sliding window frame. This operation must have a corresponding entry in the `fields` parameter array. last_value None Assigns a value from the last data object in the current sliding window frame. This operation must have a corresponding entry in the `fields` parameter array. nth_value Number Assigns a value from the nth data object in the current sliding window frame. If no such object exists, assigns ``null``. Requires a non-negative integer parameter that indicates the offset from the start of the window frame. This operation must have a corresponding entry in the `fields` parameter array. ============ ========= ========================================================================================================================================================================================================================================================================================================================= While an aggregate transform computes a single value that summarises all data objects, a window transform adds a new property to each data object. This new property is computed from the neighbouring data objects: that is, from the data objects delimited by the window field definition. For example, consider the following time series of stock prices: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.stocks.url).mark_line().encode( x='date:T', y='price:Q', color='symbol:N', ) It's hard to see the overall pattern in the above example, because Google's stock price is much higher than the other stock prices. If we plot the `z-scores`_ of the stock prices, rather than the stock prices themselves, then the overall pattern becomes clearer: .. altair-plot:: import altair as alt from altair.datasets import data alt.Chart(data.stocks.url).transform_window( mean_price='mean(price)', stdev_price='stdev(price)', frame=[None, None], groupby=['symbol'], ).transform_calculate( z_score=(alt.datum.price - alt.datum.mean_price) / alt.datum.stdev_price, ).mark_line().encode( x='date:T', y='z_score:Q', color='symbol:N', ) By using two aggregation functions (``mean`` and ``stdev``) within the window transform, we are able to compute the z-scores within the calculate transform. For more information about the arguments to the window transform, see :class:`WindowTransform` and `the Vega-Lite documentation `_. Transform Options ^^^^^^^^^^^^^^^^^ The :meth:`~Chart.transform_window` method is built on the :class:`~WindowTransform` class, which has the following options: .. altair-object-table:: altair.WindowTransform .. _z-scores: https://en.wikipedia.org/w/index.php?title=Z-score ================================================ FILE: paper/paper.bib ================================================ @article{2011-d3, title = {D3: Data-Driven Documents}, author = {Bostock, Michael and Ogievetsky, Vadim and Heer, Jeffrey}, journal = {IEEE Trans. Visualization \& Comp. Graphics (Proc. InfoVis)}, year = {2011}, url = {http://idl.cs.washington.edu/papers/d3}, doi = {10.1109/TVCG.2011.185}, } @article{2017-vega-lite, title = {Vega-Lite: A Grammar of Interactive Graphics}, author = {Satyanarayan, Arvind and Moritz, Dominik and Wongsuphasawat, Kanit and Heer, Jeffrey}, journal = {IEEE Trans. Visualization \& Comp. Graphics (Proc. InfoVis)}, year = {2017}, url = {http://idl.cs.washington.edu/papers/vega-lite}, doi = {10.1109/TVCG.2016.2599030}, } @inproceedings{2017-voyager2, title = {Voyager 2: Augmenting Visual Analysis with Partial View Specifications}, author = {Wongsuphasawat, Kanit and Qu, Zening and Moritz, Dominik and Chang, Riley and Ouk, Felix and Anand, Anushka and Mackinlay, Jock and Howe, Bill and Heer, Jeffrey}, booktitle = {Proceedings of the 2017 {CHI Conference on Human Factors in Computing Systems}}, series = {CHI '17}, year = {2017}, isbn = {978-1-4503-4655-9}, location = {Denver, Colorado, USA}, pages = {2648--2659}, numpages = {12}, url = {http://idl.cs.washington.edu/papers/voyager2}, doi = {10.1145/3025453.3025768}, acmid = {3025768}, publisher = {ACM}, address = {New York, NY, USA}, } @article{2016-voyager, title = {Voyager: Exploratory Analysis via Faceted Browsing of Visualization Recommendations}, author = {Wongsuphasawat, Kanit and Moritz, Dominik and Anand, Anushka and Mackinlay, Jock and Howe, Bill and Heer, Jeffrey}, journal = {IEEE Trans. Visualization \& Comp. Graphics (Proc. InfoVis)}, year = {2016}, url = {http://idl.cs.washington.edu/papers/voyager}, doi = {10.1109/TVCG.2015.2467191}, } @article{2016-reactive-vega-architecture, title = {Reactive {Vega}: A Streaming Dataflow Architecture for Declarative Interactive Visualization}, author = {Satyanarayan, Arvind and Russell, Ryan and Hoffswell, Jane and Heer, Jeffrey}, journal = {IEEE Trans. Visualization \& Comp. Graphics (Proc. InfoVis)}, year = {2016}, url = {http://idl.cs.washington.edu/papers/reactive-vega-architecture}, doi = {10.1109/TVCG.2015.2467091}, } @book{1977-exploratory, title={Exploratory data analysis}, author={Tukey, John W}, volume={2}, year={1977}, publisher={Reading, Mass.}, doi = {10.1002/bimj.4710230408 }, } @book{2005-grammar, author = {Wilkinson, Leland}, title = {The Grammar of Graphics}, series = {Statistics and Computing}, year = {2005}, isbn = {0387245448}, publisher = {Springer-Verlag New York, Inc.}, address = {Secaucus, NJ, USA}, doi = {10.1007/0-387-28695-0}, } ================================================ FILE: paper/paper.md ================================================ --- title: 'Altair: Interactive Statistical Visualizations for Python' tags: - Python - visualization - statistics - Jupyter authors: - name: Jacob VanderPlas orcid: 0000-0002-9623-3401 affiliation: 1 - name: Brian E. Granger orcid: 0000-0002-5223-6168 affiliation: 2 - name: Jeffrey Heer orcid: 0000-0002-6175-1655 affiliation: 1 - name: Dominik Moritz orcid: 0000-0002-3110-1053 affiliation: 1 - name: Kanit Wongsuphasawat orcid: 0000-0001-7231-279X affiliation: 1 - name: Arvind Satyanarayan orcid: 0000-0001-5564-635X affiliation: 3 - name: Eitan Lees orcid: 0000-0003-0988-6015 affiliation: 4 - name: Ilia Timofeev orcid: 0000-0003-1795-943X affiliation: 5 - name: Ben Welsh orcid: 0000-0002-5200-7269 affiliation: 6 - name: Scott Sievert orcid: 0000-0002-4275-3452 affiliation: 7 affiliations: - name: University of Washington index: 1 - name: California Polytechnic State University, San Luis Obispo index: 2 - name: MIT CSAIL index: 3 - name: Florida State University index: 4 - name: TTS Consulting index: 5 - name: Los Angeles Times Data Desk index: 6 - name: University of Wisconsin--Madison index: 7 date: 07 August 2018 bibliography: paper.bib --- # Summary Altair is a declarative statistical visualization library for Python. Statistical visualization is a constrained subset of data visualization focused on the creation of visualizations that are helpful in statistical modeling. The constrained model of statistical visualization is usually expressed in terms of a visualization grammar [@2005-grammar] that specifies how input data is transformed and mapped to visual properties (position, color, size, etc.). Altair is based on the Vega-Lite visualization grammar [@2017-vega-lite], which allows a wide range of statistical visualizations to be expressed using a small number of grammar primitives. Vega-Lite implements a view composition algebra in conjunction with a novel grammar of interactions that allow users to specify interactive charts in a few lines of code. Vega-Lite is declarative; visualizations are specified using JSON data that follows the [Vega-Lite JSON schema](https://github.com/vega/schema). As a Python library, Altair provides an API oriented towards scientists and data scientists doing exploratory data analysis [@1977-exploratory]. Altair's Python API emits Vega-Lite JSON data, which is then rendered in a user-interface such as the Jupyter Notebook, JupyterLab, or nteract using the [Vega-Lite JavaScript library](https://vega.github.io/vega-lite/). Vega-Lite JSON is compiled to a full Vega specification [@2016-reactive-vega-architecture], which is then parsed and executed using a reactive runtime that internally makes use of D3.js [@2011-d3]. The declarative nature of the Vega-Lite visualization grammar [@2005-grammar; @2017-vega-lite], and its encoding in a formal JSON schema, provide Altair with a number of benefits. First, much of the Altair Python code and tests are generated from the Vega-Lite JSON schema, ensuring strict conformance with the Vega-Lite specification. Second, the JSON data produced by Altair and consumed by Vega-Lite provides a natural serialization and file format for statistical visualizations. This is leveraged by JupyterLab, which provides built-in rendering of these files. Third, the JSON data provides a clean integration point for non-programming based visualization user-interfaces such as Voyager [@2016-voyager;@2017-voyager2]. In addition to [static documentation](https://altair-viz.github.io/), Altair includes a set of Jupyter Notebooks with examples and an interactive tutorial. These notebooks can be read by anyone with only a web-browser through [binder](https://mybinder.org/). -![Seattle Weather Interactive Visualization](seattle_weather_interactive.png) The example above is an interactive Altair visualization of the weather in Seattle. The plot on the *left* shows the initial state: a scatterplot showing the temperature and dominant weather type between January and December, and a bar chart showing the counts grouped by weather type. The plot in the *middle* shows a brush that the user has drawn to focus on the summers; which are dominantly sunny. In the last plot on the *right*, the user has clicked on the a bar to filter the scatterplot. These interactions are achieved through two selections: an interval selection on the scatterplot and a multi selection on the bar chart. The selections drive filters in the other plot. The code for this and other examples is in the [Altair gallery](https://altair-viz.github.io/gallery/). # Acknowledgements We thank the many contributors that created examples, wrote documentation, and reported bugs. You can find [an up-to-date list of contributors on GitHub](https://github.com/vega/altair/graphs/contributors). # References ================================================ FILE: pyproject.toml ================================================ # this file contains: # 1 build system configuration # 2 project configuration # 3 tool configuration, for: # - hatch # - ruff # - pytest # - mypy # - pyright (experimental) [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "altair" authors = [{ name = "Vega-Altair Contributors" }] dependencies = [ "typing_extensions>=4.12.0; python_version<'3.15'", "jinja2", # If you update the minimum required jsonschema version, also update it in build.yml "jsonschema>=3.0", "packaging", "narwhals>=2.4.0", ] description = "Vega-Altair: A declarative statistical visualization library for Python." readme = "README.md" keywords = [ "declarative", "statistics", "visualization", "interactive", "json", "vega-lite", ] requires-python = ">=3.10" dynamic = ["version"] license = { file = "LICENSE" } classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Natural Language :: English", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Typing :: Typed", ] [project.urls] Documentation = "https://altair-viz.github.io" Source = "https://github.com/vega/altair" [project.optional-dependencies] save = [ "vl-convert-python>=1.8.0", ] all = [ "altair[save]", "pandas>=1.1.3", "numpy", "pyarrow>=11", "vegafusion>=2.0.3", "anywidget>=0.9.0", "altair_tiles>=0.3.0" ] dev = [ "hatch>=1.13.0", "ruff>=0.9.5", "duckdb>=1.0", "ipython", "ipykernel", "pandas>=1.1.3", "pyarrow-stubs", "pytest", "pytest-cov", "pytest-xdist[psutil]~=3.5", "mistune", "mypy", "pandas-stubs<2.3.3", "types-jsonschema", "types-setuptools", "geopandas>=0.14.3", "polars>=0.20.3", "taskipy>=1.14.1", "tomli>=2.2.1", ] doc = [ "sphinx", "docutils", "sphinxext_altair", "jinja2", "numpydoc", "pillow", "pydata-sphinx-theme>=0.14.1", "myst-parser", "sphinx_copybutton", "sphinx-design", "scipy", "scipy-stubs; python_version>='3.10'", ] [dependency-groups] geospatial = [ "geopandas>=0.14.3", "polars-st>=0.1.0a27 ; python_full_version>='3.10'", ] [tool.altair.vega] # Minimum/exact versions, for projects under the `vega` organization vega-datasets = "v3.2.1" # https://github.com/vega/vega-datasets vega-embed = "v7" # https://github.com/vega/vega-embed vega-lite = "v6.1.0" # https://github.com/vega/vega-lite [tool.hatch] build = { include = ["/altair"], artifacts = ["altair/jupyter/js/index.js"] } metadata = { allow-direct-references = true } version = { path = "altair/__init__.py" } [tool.hatch.envs] # https://hatch.pypa.io/latest/how-to/environment/select-installer/#enabling-uv default = { features = ["all", "dev"], installer = "uv" } doc = { features = ["all", "dev", "doc"] } [tool.hatch.envs.hatch-test] # https://hatch.pypa.io/latest/tutorials/testing/overview/ features = ["all", "dev", "doc"] matrix = [{ python = ["3.10", "3.11", "3.12", "3.13", "3.14"] }] [tool.ruff] extend-exclude = [ ".git", "__pycache__", "build", "tests/examples_arguments_syntax", "tests/examples_methods_syntax", ] indent-width = 4 line-length = 88 target-version = "py310" [tool.ruff.lint] extend-safe-fixes = [ # https://docs.astral.sh/ruff/settings/#lint_extend-safe-fixes "ANN204", # missing-return-type-special-method "C405", # unnecessary-literal-set "C419", # unnecessary-comprehension-in-call "C420", # unnecessary-dict-comprehension-for-iterable "D200", # fits-on-one-line "D301", # escape-sequence-in-docstring "D400", # ends-in-period "EM101", # raw-string-in-exception "EM102", # f-string-in-exception "PLR6201", # literal-membership "TC", # flake8-type-checking "UP006", # non-pep585-annotation "UP007", # non-pep604-annotation-union "UP008", # super-call-with-parameters "W291", # trailing-whitespace "W293", # blank line contains whitespace ] extend-select = [ "FURB", # refurb "PLR1733", # unnecessary-dict-index-lookup "PLR1736", # unnecessary-list-index-lookup ] ignore = [ # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules "ANN401", # any-type "D100", # undocumented-public-module "D101", # undocumented-public-class "D102", # undocumented-public-method "D103", # undocumented-public-function "D104", # undocumented-public-package "D105", # undocumented-magic-method "D107", # undocumented-public-init "D206", # indent-with-spaces "D212", # multi-line-summary-first-line (D213 is the opposite of this) "D401", # non-imperative-mood "D413", # missing-blank-line-after-last-section "E501", # line-too-long "RUF012", # mutable-class-default "RUF052", # used-dummy-variable "SIM105", # suppressible-exception (https://github.com/vega/altair/pull/3431#discussion_r1629808660) "W505", # doc-line-too-long ] mccabe.max-complexity = 10 # https://docs.astral.sh/ruff/settings/#lint_mccabe_max-complexity preview = false # https://docs.astral.sh/ruff/preview/ pydocstyle.convention = "numpy" # https://docs.astral.sh/ruff/settings/#lintpydocstyle select = [ "ANN", # flake8-annotations "B", # flake8-bugbear "C4", # flake8-comprehensions "C901", # complex-structure "D", # pydocstyle "D213", # multi-line-summary-second-line "E", # pycodestyle-error "EM", # flake8-errmsg "F", # pyflakes "FA", # flake8-future-annotations "FLY", # flynt "I001", # unsorted-imports "NPY", # numpy-specific-rules "PIE", # flake8-pie "PLC0208", # iteration-over-set "PLR0402", # manual-from-import "PLR1711", # useless-return "PLR1714", # repeated-equality-comparison "PLR5501", # collapsible-else-if "PLW0120", # useless-else-on-loop "PLW1510", # subprocess-run-without-check "PLW3301", # nested-min-max "PT", # flake8-pytest-style "PTH", # flake8-use-pathlib "RUF", # Ruff-specific rules "SIM", # flake8-simplify "TC", # flake8-type-checking "TID", # flake8-tidy-imports "UP", # pyupgrade "W", # pycodestyle-warning ] [tool.ruff.lint.isort] classes = ["datum", "expr"] extra-standard-library = ["typing_extensions"] known-first-party = [ "altair_tiles", "sphinxext_altair", "vegafusion", "vl_convert", ] split-on-trailing-comma = false [tool.ruff.lint.flake8-tidy-imports.banned-api] # https://docs.astral.sh/ruff/settings/#lint_flake8-tidy-imports_banned-api "narwhals.dependencies".msg = """ Import `dependencies` from `narwhals.stable.v1` instead. """ "narwhals.dtypes".msg = """ Import `dtypes` from `narwhals.stable.v1` instead. """ "narwhals.typing".msg = """ Import `typing` from `narwhals.stable.v1` instead. """ "typing.Optional".msg = """ Use `Union[T, None]` instead. `typing.Optional` is likely to be confused with `altair.typing.Optional`, \ which have a similar but different semantic meaning. See https://github.com/vega/altair/pull/3449 """ [tool.ruff.lint.per-file-ignores] "!altair/vegalite/v6/api.py" = ["ANN"] # Only enforce annotation rules on public api "tests/**/*.py" = ["C901"] # Allow complex if/elif branching during tests [tool.ruff.format] docstring-code-format = true # https://docs.astral.sh/ruff/formatter/#docstring-formatting docstring-code-line-length = 88 line-ending = "lf" [tool.taskipy.settings] cwd = "." [tool.taskipy.tasks] lint = "ruff check" format = "ruff format --diff --check" ruff-check = "task lint && task format" ruff-fix = "task lint && ruff format" type-check = "mypy altair tests" pytest-serial = "pytest -m \"no_xdist\" --numprocesses=1" pytest = "pytest && task pytest-serial" test = "task ruff-check && task type-check && task pytest" test-fast = "task ruff-fix && pytest -m \"not slow and not datasets_debug and not no_xdist\"" test-slow = "task ruff-fix && pytest -m \"slow and not datasets_debug and not no_xdist\"" test-datasets = "task ruff-fix && pytest tests -k test_datasets -m \"not no_xdist\" && task pytest-serial" test-min = "task ruff-check && task type-check && hatch test --python 3.10" test-all = "task ruff-check && task type-check && hatch test --all" generate-schema-wrapper = "mypy tools && python tools/generate_schema_wrapper.py && task test" update-init-file = "python tools/update_init_file.py && task ruff-fix" doc-clean = "python -c \"import tools;tools.fs.rm('doc/_build')\"" doc-clean-generated = "python -c \"import tools;tools.fs.rm('doc/user_guide/generated', 'doc/gallery')\"" doc-clean-images = "python -c \"import tools;tools.fs.rm('doc/_images')\"" doc-clean-all = "task doc-clean && task doc-clean-generated && task doc-clean-images" doc-mkdir = "python -c \"import tools;tools.fs.mkdir('doc/_images')\"" doc-build-html = "task doc-mkdir && sphinx-build -b html -d doc/_build/doctrees doc doc/_build/html" doc-clean-build = "task doc-clean-all && task doc-build-html" doc-serve = "python -m http.server --bind \"127.0.0.1\" --directory doc/_build/html 8000" doc-publish = "python tools/sync_website.py" doc-publish-clean-build = "task doc-clean-build && task doc-publish" clean = "python -c \"import tools;tools.fs.rm('dist')\"" build = "task clean && uv build" publish = "task build && uv publish" nightly-build = "python tools/generate_nightly_version.py --update && task build" nightly-release = "task nightly-build && echo 'Package built, ready for GitHub release'" [tool.pytest.ini_options] # Pytest does not need to search these folders for test functions. # They contain examples which are being executed by the # test_examples tests. norecursedirs = [ "tests/examples_arguments_syntax", "tests/examples_methods_syntax" ] testpaths = ["tests","altair","tools"] addopts = [ "--numprocesses=logical", "--doctest-modules", "-m not datasets_debug and not no_xdist", ] # https://docs.pytest.org/en/stable/how-to/mark.html#registering-marks markers = [ "slow: Label tests as slow (deselect with '-m \"not slow\"')", "datasets_debug: Disabled by default due to high number of requests", "no_xdist: Unsafe to run in parallel", "geospatial: Tests that require geopandas (deselect with '-m \"not geospatial\"')" ] [tool.mypy] warn_unused_ignores = true pretty = true [[tool.mypy.overrides]] module = [ "pyarrow.*", "yaml.*", "pandas.lib.*", "geopandas.*", "nbformat.*", "ipykernel.*", "ibis.*", "vegafusion.*", "scipy.*", "polars_st.*", "duckdb.*", ] ignore_missing_imports = true disable_error_code = ["import-untyped"] [[tool.mypy.overrides]] module = [ "tests/examples_arguments_syntax.*", "tests/examples_methods_syntax.*", ] disable_error_code = ["annotation-unchecked"] [tool.pyright] enableExperimentalFeatures = true pythonPlatform = "All" pythonVersion = "3.10" reportTypedDictNotRequiredAccess = "none" reportIncompatibleMethodOverride = "none" reportUnusedExpression = "none" reportUnsupportedDunderAll = "none" include = ["altair", "doc", "tests", "tools", "sphinxext"] ignore = [ "./altair/vegalite/v6/schema/channels.py", # 716 warns "./altair/vegalite/v6/schema/mixins.py", # 1001 warns "./altair/jupyter/", # Mostly untyped "./tests/test_jupyter_chart.py", # Based on untyped module "../../../**/Lib", # stdlib "../../../**/typeshed*" # typeshed-fallback ] ================================================ FILE: sphinxext/__init__.py ================================================ ================================================ FILE: sphinxext/altairgallery.py ================================================ from __future__ import annotations import collections import hashlib import json import random import shutil import warnings from operator import itemgetter from pathlib import Path from typing import TYPE_CHECKING, Any import jinja2 from docutils import nodes from docutils.parsers.rst import Directive from docutils.parsers.rst.directives import flag from docutils.statemachine import StringList from sphinx.util.nodes import nested_parse_with_titles from altair.utils.execeval import eval_block from tests.examples_arguments_syntax import iter_examples_arguments_syntax from tests.examples_methods_syntax import iter_examples_methods_syntax from .utils import ( create_generic_image, create_thumbnail, get_docstring_and_rest, prev_this_next, ) if TYPE_CHECKING: from docutils.nodes import Node EXAMPLE_MODULE = "altair.examples" GALLERY_TEMPLATE = jinja2.Template( """ .. This document is auto-generated by the altair-gallery extension. Do not modify directly. .. _{{ gallery_ref }}: {{ title }} {% for char in title %}-{% endfor %} This gallery contains a selection of examples of the plots Altair can create. Some may seem fairly complicated at first glance, but they are built by combining a simple set of declarative building blocks. Many draw upon sample datasets compiled by the `Vega `_ project. If you can't find the plots you are looking for here, make sure to check out the :ref:`altair-ecosystem` section, which has links to packages for making e.g. network diagrams and animations. .. note:: With the release of Altair 6, the documentation was updated to use ``from altair.datasets import data`` instead of ``from vega_datasets import data``. This change also introduced updated column names in some datasets (e.g., spaces instead of underscores). {% for grouper, group in examples %} .. _gallery-category-{{ grouper }}: {{ grouper }} {% for char in grouper %}~{% endfor %} .. raw:: html {% for example in group %} {{ example.title }} {% endfor %}
{% endfor %} .. toctree:: :maxdepth: 2 :caption: Examples :hidden: Gallery Tutorials <../case_studies/index> """ ) MINIGALLERY_TEMPLATE = jinja2.Template( """ .. raw:: html
{% for example in examples %} {% endfor %}
""" ) EXAMPLE_TEMPLATE = jinja2.Template( """ :orphan: :html_theme.sidebar_secondary.remove: .. This document is auto-generated by the altair-gallery extension. Do not modify directly. .. _gallery_{{ name }}: {{ docstring }} .. altair-plot:: {% if code_below %}:remove-code:{% endif %} {% if strict %}:strict:{% endif %} {{ code | indent(4) }} .. tab-set:: .. tab-item:: Method syntax :sync: method .. code:: python {{ method_code | indent(12) }} .. tab-item:: Attribute syntax :sync: attribute .. code:: python {{ code | indent(12) }} """ ) def save_example_pngs( examples: list[dict[str, Any]], image_dir: Path, make_thumbnails: bool = True ) -> None: """Save example pngs and (optionally) thumbnails.""" encoding = "utf-8" # store hashes so that we know whether images need to be generated hash_file: Path = image_dir / "_image_hashes.json" if hash_file.exists(): with hash_file.open(encoding=encoding) as f: hashes = json.load(f) else: hashes = {} for example in examples: name: str = example["name"] use_svg: bool = example["use_svg"] code = example["code"] filename = name + (".svg" if use_svg else ".png") image_file = image_dir / filename example_hash = hashlib.sha256(code.encode()).hexdigest()[:32] hashes_match = hashes.get(filename, "") == example_hash if hashes_match and image_file.exists(): print(f"-> using cached {image_file!s}") else: # the file changed or the image file does not exist. Generate it. print(f"-> saving {image_file!s}") chart = eval_block(code, strict=True) try: chart.save(image_file) hashes[filename] = example_hash except ImportError: warnings.warn("Unable to save image: using generic image", stacklevel=1) create_generic_image(image_file) with hash_file.open("w", encoding=encoding) as f: json.dump(hashes, f) if make_thumbnails: params = example.get("galleryParameters", {}) if use_svg: # Thumbnail for SVG is identical to original image shutil.copyfile(image_file, image_dir / f"{name}-thumb.svg") else: create_thumbnail(image_file, image_dir / f"{name}-thumb.png", **params) # Save hashes so we know whether we need to re-generate plots with hash_file.open("w", encoding=encoding) as f: json.dump(hashes, f) def populate_examples(**kwds: Any) -> list[dict[str, Any]]: """Iterate through Altair examples and extract code.""" examples = sorted(iter_examples_arguments_syntax(), key=itemgetter("name")) method_examples = {x["name"]: x for x in iter_examples_methods_syntax()} for example in examples: docstring, category, code, lineno = get_docstring_and_rest(example["filename"]) if example["name"] in method_examples: _, _, method_code, _ = get_docstring_and_rest( method_examples[example["name"]]["filename"] ) else: method_code = code code += ( "# No channel encoding options are specified in this chart\n" "# so the code is the same as for the method-based syntax.\n" ) example.update(kwds) if category is None: msg = f"The example {example['name']} is not assigned to a category" raise Exception(msg) example.update( { "docstring": docstring, "title": docstring.strip().split("\n")[0], "code": code, "method_code": method_code, "category": category.title(), "lineno": lineno, } ) return examples def _indices(x: str, /) -> list[int]: return [int(idx) for idx in x.split()] class AltairMiniGalleryDirective(Directive): has_content = False option_spec = { "size": int, "names": str, "indices": _indices, "shuffle": flag, "seed": int, "titles": bool, "width": str, } def run(self) -> list[Node]: size = self.options.get("size", 15) names = [name.strip() for name in self.options.get("names", "").split(",")] indices = self.options.get("indices", []) shuffle = "shuffle" in self.options seed = self.options.get("seed", 42) titles = self.options.get("titles", False) width = self.options.get("width", None) env = self.state.document.settings.env app = env.app gallery_dir = app.builder.config.altair_gallery_dir examples = populate_examples() if names: if len(names) < size: msg = ( "altair-minigallery: if names are specified, " "the list must be at least as long as size." ) raise ValueError(msg) mapping = {example["name"]: example for example in examples} examples = [mapping[name] for name in names] else: if indices: examples = [examples[i] for i in indices] if shuffle: random.seed(seed) random.shuffle(examples) if size: examples = examples[:size] include = MINIGALLERY_TEMPLATE.render( image_dir="/_static", gallery_dir=gallery_dir, examples=examples, titles=titles, width=width, ) # parse and return documentation result = StringList() for line in include.split("\n"): result.append(line, "") node = nodes.paragraph() node.document = self.state.document nested_parse_with_titles(self.state, result, node) return node.children def main(app) -> None: src_dir = Path(app.builder.srcdir) target_dir: Path = src_dir / Path(app.builder.config.altair_gallery_dir) image_dir: Path = src_dir / "_images" gallery_ref = app.builder.config.altair_gallery_ref gallery_title = app.builder.config.altair_gallery_title examples = populate_examples(gallery_ref=gallery_ref, code_below=True, strict=False) target_dir.mkdir(parents=True, exist_ok=True) image_dir.mkdir(exist_ok=True) examples = sorted(examples, key=itemgetter("title")) examples_toc = collections.OrderedDict( { "Simple Charts": [], "Bar Charts": [], "Line Charts": [], "Area Charts": [], "Circular Plots": [], "Scatter Plots": [], "Uncertainties And Trends": [], "Distributions": [], "Tables": [], "Maps": [], "Interactive Charts": [], "Advanced Calculations": [], "Case Studies": [], } ) for d in examples: examples_toc[d["category"]].append(d) encoding = "utf-8" # Write the gallery index file fp = target_dir / "index.rst" fp.write_text( GALLERY_TEMPLATE.render( title=gallery_title, examples=examples_toc.items(), image_dir="/_static", gallery_ref=gallery_ref, ), encoding=encoding, ) # save the images to file save_example_pngs(examples, image_dir) # Write the individual example files for prev_ex, example, next_ex in prev_this_next(examples): if prev_ex: example["prev_ref"] = "gallery_{name}".format(**prev_ex) if next_ex: example["next_ref"] = "gallery_{name}".format(**next_ex) fp = target_dir / "".join((example["name"], ".rst")) fp.write_text(EXAMPLE_TEMPLATE.render(example), encoding=encoding) def setup(app) -> None: app.connect("builder-inited", main) app.add_css_file("altair-gallery.css") app.add_config_value("altair_gallery_dir", "gallery", "env") app.add_config_value("altair_gallery_ref", "example-gallery", "env") app.add_config_value("altair_gallery_title", "Example Gallery", "env") app.add_directive_to_domain("py", "altair-minigallery", AltairMiniGalleryDirective) ================================================ FILE: sphinxext/code_ref.py ================================================ """Sphinx extension providing formatted code blocks, referencing some function.""" from __future__ import annotations from typing import TYPE_CHECKING, Literal, cast, get_args from docutils import nodes from docutils.parsers.rst import directives from sphinx.util.docutils import SphinxDirective from sphinx.util.parsing import nested_parse_to_nodes from altair.vegalite.v6.schema._typing import VegaThemes from tools.codemod import extract_func_def, extract_func_def_embed if TYPE_CHECKING: import sys from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence from typing import Any, ClassVar, TypeAlias, TypeVar from docutils.parsers.rst.states import RSTState, RSTStateMachine from docutils.statemachine import StringList from sphinx.application import Sphinx if sys.version_info >= (3, 12): from typing import TypeAliasType else: from typing_extensions import TypeAliasType T = TypeVar("T") OneOrIter = TypeAliasType("OneOrIter", T | Iterable[T], type_params=(T,)) _OutputShort: TypeAlias = Literal["code", "plot"] _OutputLong: TypeAlias = Literal["code-block", "altair-plot"] _OUTPUT_REMAP: Mapping[_OutputShort, _OutputLong] = { "code": "code-block", "plot": "altair-plot", } _Option: TypeAlias = Literal["output", "fold", "summary"] _PYSCRIPT_URL_FMT = "https://pyscript.net/releases/{0}/core.js" _PYSCRIPT_VERSION = "2025.2.2" _PYSCRIPT_URL = _PYSCRIPT_URL_FMT.format(_PYSCRIPT_VERSION) def validate_output(output: Any) -> _OutputLong: output = output.strip().lower() if output not in {"plot", "code"}: msg = f":output: option must be one of {get_args(_OutputShort)!r}" raise TypeError(msg) else: short = cast("_OutputShort", output) return _OUTPUT_REMAP[short] def validate_packages(packages: Any) -> str: if packages is None: return '["altair", "vega-datasets"]' else: split = [pkg.strip() for pkg in packages.split(",")] if len(split) == 1: return f'["{split[0]}"]' else: return f"[{','.join(split)}]" def raw_html(text: str, /) -> nodes.raw: return nodes.raw("", text, format="html") def maybe_details( parsed: Iterable[nodes.Node], options: dict[_Option, Any], *, default_summary: str ) -> Sequence[nodes.Node]: """ Wrap ``parsed`` in a folding `details`_ block if requested. Parameters ---------- parsed Target nodes that have been processed. options Optional arguments provided to ``.. altair-code-ref::``. .. note:: If no relevant options are specified, ``parsed`` is returned unchanged. default_summary Label text used when **only** specifying ``:fold:``. .. _details: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/details """ def gen() -> Iterator[nodes.Node]: if {"fold", "summary"}.isdisjoint(options.keys()): yield from parsed else: summary = options.get("summary", default_summary) yield raw_html(f"

{summary}") yield from parsed yield raw_html("

") return list(gen()) def theme_names() -> tuple[Sequence[str], Sequence[str]]: names: set[str] = set(get_args(VegaThemes)) carbon = {nm for nm in names if nm.startswith("carbon")} return ["default", *sorted(names - carbon)], sorted(carbon) def option(label: str, value: str | None = None, /) -> nodes.raw: s = f"\n") def optgroup(label: str, *options: OneOrIter[nodes.raw]) -> Iterator[nodes.raw]: yield raw_html(f"\n") for opt in options: if isinstance(opt, nodes.raw): yield opt else: yield from opt yield raw_html("\n") def dropdown( id: str, label: str | None, extra_select: str, *options: OneOrIter[nodes.raw] ) -> Iterator[nodes.raw]: if label: yield raw_html(f"\n") select_text = f"\n") def pyscript( packages: str, target_div_id: str, loading_label: str, py_code: str ) -> Iterator[nodes.raw]: PY = "py" LB, RB = "{", "}" packages = f""""packages":{packages}""" yield raw_html(f"
{loading_label}
\n") yield raw_html(f"\n") def _before_code(refresh_name: str, select_id: str, target_div_id: str) -> str: INDENT = " " * 4 return ( f"from js import document\n" f"from pyscript import display\n" f"import altair as alt\n\n" f"def {refresh_name}(*args):\n" f"{INDENT}selected = document.getElementById({select_id!r}).value\n" f"{INDENT}alt.renderers.set_embed_options(theme=selected)\n" f"{INDENT}display(chart, append=False, target={target_div_id!r})\n" ) class ThemeDirective(SphinxDirective): """ Theme preview directive. Similar to ``CodeRefDirective``, but uses `PyScript`_ to access the browser. .. _PyScript: https://pyscript.net/ """ has_content: ClassVar[bool] = False required_arguments: ClassVar[int] = 1 option_spec = { "packages": validate_packages, "dropdown-label": directives.unchanged, "loading-label": directives.unchanged, "fold": directives.flag, "summary": directives.unchanged_required, } def run(self) -> Sequence[nodes.Node]: results: list[nodes.Node] = [] SELECT_ID = "embed_theme" REFRESH_NAME = "apply_embed_input" TARGET_DIV_ID = "render_altair" standard_names, carbon_names = theme_names() qual_name = self.arguments[0] module_name, func_name = qual_name.rsplit(".", 1) dropdown_label = self.options.get("dropdown-label", "Select theme:") loading_label = self.options.get("loading-label", "loading...") packages: str = self.options.get("packages", validate_packages(None)) results.append(raw_html("

\n")) results.extend( dropdown( SELECT_ID, dropdown_label, f"py-input={REFRESH_NAME!r}", (option(nm) for nm in standard_names), optgroup("Carbon", (option(nm) for nm in carbon_names)), ) ) py_code = extract_func_def_embed( module_name, func_name, before=_before_code(REFRESH_NAME, SELECT_ID, TARGET_DIV_ID), after=f"{REFRESH_NAME}()", assign_to="chart", indent=4, ) # For PyScript/Pyodide compatibility, use vega_datasets until new Altair is published py_code = py_code.replace( "from altair.datasets import data", "from vega_datasets import data" ) # vega_datasets uses underscores in column names, not spaces # Order matters: do aggregation functions first (they contain field names) py_code = py_code.replace("mean(IMDB Rating)", "mean(IMDB_Rating)") py_code = py_code.replace( "mean(Rotten Tomatoes Rating)", "mean(Rotten_Tomatoes_Rating)" ) py_code = py_code.replace('datum["IMDB Rating"]', "datum.IMDB_Rating") py_code = py_code.replace( 'datum["Rotten Tomatoes Rating"]', "datum.Rotten_Tomatoes_Rating" ) py_code = py_code.replace('datum["IMDB Votes"]', "datum.IMDB_Votes") # Field references in encodings (remaining ones) py_code = py_code.replace('"IMDB Rating"', '"IMDB_Rating"') py_code = py_code.replace( '"Rotten Tomatoes Rating"', '"Rotten_Tomatoes_Rating"' ) py_code = py_code.replace('"IMDB Votes"', '"IMDB_Votes"') py_code = py_code.replace('"Release Date:T"', '"Release_Date:T"') py_code = py_code.replace('"Release Date"', '"Release_Date"') # Restore display titles that were caught by the broad field replacement above py_code = py_code.replace('.title("Release_Date")', '.title("Release Date")') py_code = py_code.replace("'IMDB Rating'", "'IMDB_Rating'") py_code = py_code.replace( "'Rotten Tomatoes Rating'", "'Rotten_Tomatoes_Rating'" ) py_code = py_code.replace("'IMDB Votes'", "'IMDB_Votes'") py_code = py_code.replace("'Release Date'", "'Release_Date'") results.extend( pyscript(packages, TARGET_DIV_ID, loading_label, py_code=py_code) ) results.append(raw_html("

\n")) return maybe_details( results, self.options, # pyright: ignore[reportArgumentType] default_summary="Show Vega-Altair Theme Test", ) class PyScriptDirective(SphinxDirective): """Placeholder for non-theme related directive.""" has_content: ClassVar[bool] = False option_spec = {"packages": directives.unchanged} def run(self) -> Sequence[nodes.Node]: raise NotImplementedError class CodeRefDirective(SphinxDirective): """ Formatted code block, referencing the contents of a function definition. Options: .. altair-code-ref:: :output: [code, plot] :fold: flag :summary: str Examples -------- Reference a function, generating a code block: .. altair-code-ref:: package.module.function Wrap the code block in a collapsible `details`_ tag: .. altair-code-ref:: package.module.function :fold: Override default ``"Show code"`` `details`_ summary: .. altair-code-ref:: package.module.function :fold: :summary: Look here! Use `altair-plot`_ instead of a code block: .. altair-code-ref:: package.module.function :output: plot .. note:: Using `altair-plot`_ currently ignores the other options. .. _details: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/details .. _altair-plot: https://github.com/vega/sphinxext-altair """ has_content: ClassVar[bool] = False required_arguments: ClassVar[int] = 1 option_spec: ClassVar[dict[_Option, Callable[[str], Any]]] = { # pyright: ignore[reportIncompatibleVariableOverride] "output": validate_output, "fold": directives.flag, "summary": directives.unchanged_required, } def __init__( self, name: str, arguments: list[str], options: dict[_Option, Any], content: StringList, lineno: int, content_offset: int, block_text: str, state: RSTState, state_machine: RSTStateMachine, ) -> None: super().__init__(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine) # fmt: skip # pyright: ignore[reportArgumentType] self.options: dict[_Option, Any] # pyright: ignore[reportIncompatibleVariableOverride] def run(self) -> Sequence[nodes.Node]: qual_name = self.arguments[0] module_name, func_name = qual_name.rsplit(".", 1) output: _OutputLong = self.options.get("output", "code-block") content = extract_func_def(module_name, func_name, output=output) parsed = nested_parse_to_nodes(self.state, content) return maybe_details(parsed, self.options, default_summary="Show code") def setup(app: Sphinx) -> None: app.add_directive_to_domain("py", "altair-code-ref", CodeRefDirective) app.add_js_file(_PYSCRIPT_URL, loading_method="defer", type="module") # app.add_directive("altair-pyscript", PyScriptDirective) app.add_directive("altair-theme", ThemeDirective) ================================================ FILE: sphinxext/schematable.py ================================================ from __future__ import annotations import importlib import re import warnings from typing import TYPE_CHECKING, Any from docutils import frontend, nodes, utils from docutils.parsers.rst import Directive from docutils.parsers.rst.directives import flag from myst_parser.parsers.docutils_ import Parser from sphinx import addnodes from tools.schemapi.utils import SchemaInfo, fix_docstring_issues if TYPE_CHECKING: from collections.abc import Iterator, Sequence def type_description(schema: dict[str, Any]) -> str: """Return a concise type description for the given schema.""" if not schema or not isinstance(schema, dict) or schema.keys() == {"description"}: return "any" elif "$ref" in schema: return ":class:`{}`".format(schema["$ref"].split("/")[-1]) elif "enum" in schema: return "[{}]".format(", ".join(repr(s) for s in schema["enum"])) elif "type" in schema: if isinstance(schema["type"], list): return "[{}]".format(", ".join(schema["type"])) elif schema["type"] == "array": return "array({})".format(type_description(schema.get("items", {}))) elif schema["type"] == "object": return "dict" else: return "`{}`".format(schema["type"]) elif "anyOf" in schema: return "anyOf({})".format( ", ".join(type_description(s) for s in schema["anyOf"]) ) else: warnings.warn( f"cannot infer type for schema with keys {schema.keys()}", stacklevel=1, ) return "--" def prepare_table_header( titles: Sequence[str], widths: Sequence[float] ) -> tuple[nodes.table, nodes.tbody]: """Build docutil empty table.""" ncols = len(titles) assert len(widths) == ncols tgroup = nodes.tgroup(cols=ncols) for width in widths: tgroup += nodes.colspec(colwidth=width) header = nodes.row() for title in titles: header += nodes.entry("", nodes.paragraph(text=title)) tgroup += nodes.thead("", header) tbody = nodes.tbody() tgroup += tbody return nodes.table("", tgroup), tbody reClassDef = re.compile(r":class:`([^`]+)`") reCode = re.compile(r"`([^`]+)`") def add_class_def(node: nodes.paragraph, classDef: str) -> nodes.paragraph: """Add reference on classDef to node.""" ref = addnodes.pending_xref( reftarget=classDef, reftype="class", refdomain="py", # py:class="None" py:module="altair" refdoc="user_guide/marks" refexplicit=False, # refdoc="", refwarn=False, ) ref["py:class"] = "None" ref["py:module"] = "altair" ref += nodes.literal(text=classDef, classes=["xref", "py", "py-class"]) node += ref return node def add_text(node: nodes.paragraph, text: str) -> nodes.paragraph: """Add text with inline code to node.""" is_text = True for part in reCode.split(text): if part: if is_text: node += nodes.Text(part, part) # pyright: ignore[reportCallIssue] else: node += nodes.literal(part, part) is_text = not is_text return node def build_row( item: tuple[str, dict[str, Any]], rootschema: dict[str, Any] | None ) -> nodes.row: """Return nodes.row with property description.""" prop, propschema = item row = nodes.row() # Property row += nodes.entry("", nodes.paragraph(text=prop), classes=["vl-prop"]) # Type str_type = type_description(propschema) par_type = nodes.paragraph() is_text = True for part in reClassDef.split(str_type): if part: if is_text: add_text(par_type, part) else: add_class_def(par_type, part) is_text = not is_text # row += nodes.entry('') row += nodes.entry("", par_type, classes=["vl-type-def"]) # Description md_parser = Parser() # str_descr = "***Required.*** " if required else "" description = SchemaInfo(propschema, rootschema).deep_description description = description or " " str_descr = "" str_descr += description str_descr = fix_docstring_issues(str_descr) document_settings = frontend.get_default_settings() document_settings.setdefault("raw_enabled", True) doc_descr = utils.new_document("schema_description", document_settings) md_parser.parse(str_descr, doc_descr) # row += nodes.entry('', *doc_descr.children, classes="vl-decsr") row += nodes.entry("", *doc_descr.children, classes=["vl-decsr"]) return row def build_schema_table( items: Iterator[tuple[str, dict[str, Any]]], rootschema: dict[str, Any] | None ) -> nodes.table: """Return schema table of items (iterator of prop, schema.item, required).""" table, tbody = prepare_table_header( ["Property", "Type", "Description"], [10, 20, 50] ) for item in items: tbody += build_row(item, rootschema) return table def select_items_from_schema( schema: dict[str, Any], props: list[str] | None = None ) -> Iterator[tuple[Any, Any] | tuple[str, Any]]: """Return iterator (prop, schema.item) on prop, return all in None.""" properties = schema.get("properties", {}) if not props: for prop, item in properties.items(): yield prop, item else: for prop in props: try: yield prop, properties[prop] except KeyError as err: msg = f"Can't find property: {prop}" raise Exception(msg) from err def prepare_schema_table( schema: dict[str, Any], rootschema: dict[str, Any] | None, props: list[str] | None = None, ) -> nodes.table: items = select_items_from_schema(schema, props) return build_schema_table(items, rootschema) def validate_properties(properties: str) -> list[str]: return properties.strip().split() class AltairObjectTableDirective(Directive): """ Directive for building a table of attribute descriptions. Usage: .. altair-object-table:: altair.MarkConfig """ has_content = False required_arguments = 1 option_spec = {"properties": validate_properties, "dont-collapse-table": flag} def run(self) -> list: objectname = self.arguments[0] modname, classname = objectname.rsplit(".", 1) module = importlib.import_module(modname) cls: type[Any] = getattr(module, classname) schema = cls.resolve_references(cls._schema) properties = self.options.get("properties", None) dont_collapse_table = "dont-collapse-table" in self.options result = [] if not dont_collapse_table: html = "
Click to show table" raw_html = nodes.raw("", html, format="html") result += [raw_html] # create the table from the object result.append(prepare_schema_table(schema, cls._rootschema, props=properties)) if not dont_collapse_table: html = "
" raw_html = nodes.raw("", html, format="html") result += [raw_html] return result def setup(app) -> None: app.add_directive("altair-object-table", AltairObjectTableDirective) ================================================ FILE: sphinxext/utils.py ================================================ from __future__ import annotations import ast import hashlib import itertools import json import re from pathlib import Path from typing import Any def create_thumbnail( image_filename: Path, thumb_filename: Path, window_size: tuple[float, float] = (280, 160), ) -> None: """Create a thumbnail whose shortest dimension matches the window.""" from PIL import Image im = Image.open(image_filename) im_width, im_height = im.size width, height = window_size width_factor, height_factor = width / im_width, height / im_height if width_factor > height_factor: final_width = width final_height = int(im_height * width_factor) else: final_height = height final_width = int(im_width * height_factor) thumb = im.resize((final_width, final_height), Image.Resampling.LANCZOS) thumb.save(thumb_filename) def create_generic_image( filename: Path, shape: tuple[float, float] = (200, 300), gradient: bool = True ) -> None: """Create a generic image.""" import numpy as np from PIL import Image assert len(shape) == 2 arr = np.zeros((shape[0], shape[1], 3)) if gradient: # gradient from gray to white arr += np.linspace(128, 255, shape[1])[:, None] # pyright: ignore[reportCallIssue,reportArgumentType] im = Image.fromarray(arr.astype("uint8")) im.save(filename) SYNTAX_ERROR_DOCSTRING = """ SyntaxError =========== Example script with invalid Python syntax """ def _parse_source_file(filename: str | Path) -> tuple[ast.Module | None, str]: """ Parse source file into AST node. Parameters ---------- filename : str File path Returns ------- node : AST node content : utf-8 encoded string Notes ----- This function adapted from the sphinx-gallery project; license: BSD-3 https://github.com/sphinx-gallery/sphinx-gallery/ """ content = Path(filename).read_text(encoding="utf-8") # change from Windows format to UNIX for uniformity content = content.replace("\r\n", "\n") try: node = ast.parse(content) except SyntaxError: node = None return node, content def get_docstring_and_rest(filename: str | Path) -> tuple[str, str | None, str, int]: """ Separate ``filename`` content between docstring and the rest. Strongly inspired from ast.get_docstring. Parameters ---------- filename: str The path to the file containing the code to be read Returns ------- docstring: str docstring of ``filename`` category: list list of categories specified by the "# category:" comment rest: str ``filename`` content without the docstring lineno: int the line number on which the code starts Notes ----- This function adapted from the sphinx-gallery project; license: BSD-3 https://github.com/sphinx-gallery/sphinx-gallery/ """ node, content = _parse_source_file(filename) # Find the category comment find_category = re.compile(r"^#\s*category:\s*(.*)$", re.MULTILINE) match = find_category.search(content) if match is not None: category = match.groups()[0] # remove this comment from the content content = find_category.sub("", content) else: category = None lineno = 1 if node is None: return SYNTAX_ERROR_DOCSTRING, category, content, lineno if not isinstance(node, ast.Module): msg = f"This function only supports modules. You provided {node.__class__.__name__}" raise TypeError(msg) try: # In python 3.7 module knows its docstring. # Everything else will raise an attribute error docstring = node.docstring # pyright: ignore[reportAttributeAccessIssue] import tokenize from io import BytesIO ts = tokenize.tokenize(BytesIO(content).readline) # pyright: ignore[reportArgumentType] ds_lines = 0 # find the first string according to the tokenizer and get # it's end row for tk in ts: if tk.exact_type == 3: ds_lines, _ = tk.end break # grab the rest of the file rest = "\n".join(content.split("\n")[ds_lines:]) lineno = ds_lines + 1 except AttributeError: # this block can be removed when python 3.6 support is dropped if ( node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant) ): docstring_node = node.body[0] docstring = docstring_node.value.s # pyright: ignore[reportAttributeAccessIssue] # python2.7: Code was read in bytes needs decoding to utf-8 # unless future unicode_literals is imported in source which # make ast output unicode strings if hasattr(docstring, "decode") and not isinstance(docstring, str): docstring = docstring.decode("utf-8") # python3.8: has end_lineno lineno = getattr( docstring_node, "end_lineno", docstring_node.lineno ) # The last line of the string. # This get the content of the file after the docstring last line # Note: 'maxsplit' argument is not a keyword argument in python2 rest = content.split("\n", lineno)[-1] lineno += 1 else: docstring, rest = "", "" if not docstring: msg = ( f'Could not find docstring in file "{filename}". ' "A docstring is required for the example gallery." ) raise ValueError(msg) return docstring, category, rest, lineno def prev_this_next( it: list[dict[str, Any]], sentinel: None = None ) -> zip[tuple[dict[str, Any] | None, dict[str, Any], dict[str, Any] | None]]: """Utility to return (prev, this, next) tuples from an iterator.""" i1, i2, i3 = itertools.tee(it, 3) next(i3, None) return zip( itertools.chain([sentinel], i1), i2, itertools.chain(i3, [sentinel]), strict=False, ) def dict_hash(dct: dict[Any, Any]) -> Any: """Return a hash of the contents of a dictionary.""" serialized = json.dumps(dct, sort_keys=True) try: m = hashlib.sha256(serialized)[:32] # pyright: ignore[reportArgumentType,reportIndexIssue] except TypeError: m = hashlib.sha256(serialized.encode())[:32] # pyright: ignore[reportIndexIssue] return m.hexdigest() ================================================ FILE: tests/__init__.py ================================================ from __future__ import annotations import pkgutil import re import sys from importlib.util import find_spec from pathlib import Path from typing import TYPE_CHECKING, overload import pytest from tests import examples_arguments_syntax, examples_methods_syntax if TYPE_CHECKING: from collections.abc import Collection, Iterator, Mapping from re import Pattern from typing import TypeAlias from _pytest.mark import ParameterSet # pyright: ignore[reportPrivateImportUsage] from _pytest.mark.structures import ( Markable, # pyright: ignore[reportPrivateImportUsage] ) MarksType: TypeAlias = ( "pytest.MarkDecorator | Collection[pytest.MarkDecorator | pytest.Mark]" ) def windows_has_tzdata() -> bool: """ From PyArrow: python/pyarrow/tests/util.py. This is the default location where tz.cpp will look for (until we make this configurable at run-time) Skip test on Windows when the tz database is not configured. See https://github.com/vega/altair/issues/3050. """ return (Path.home() / "Downloads" / "tzdata").exists() slow: pytest.MarkDecorator = pytest.mark.slow() """ Custom ``pytest.mark`` decorator. By default **all** tests are run. Slow tests can be **excluded** using:: >>> hatch run test-fast # doctest: +SKIP To run **only** slow tests use:: >>> hatch run test-slow # doctest: +SKIP Either script can accept ``pytest`` args:: >>> hatch run test-slow --durations=25 # doctest: +SKIP """ no_xdist: pytest.MarkDecorator = pytest.mark.no_xdist() """ Custom ``pytest.mark`` decorator. Each marked test will run **serially**, after all other selected tests. .. tip:: Use as a last resort when a test depends on manipulating global state. """ skip_requires_ipython: pytest.MarkDecorator = pytest.mark.skipif( find_spec("IPython") is None, reason="`IPython` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `IPython`_ import would fail. .. _IPython: https://github.com/ipython/ipython """ skip_requires_vl_convert: pytest.MarkDecorator = pytest.mark.skipif( find_spec("vl_convert") is None, reason="`vl_convert` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `vl-convert`_ import would fail. .. _vl-convert: https://github.com/vega/vl-convert """ skip_requires_vegafusion: pytest.MarkDecorator = pytest.mark.skipif( find_spec("vegafusion") is None, reason="`vegafusion` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `vegafusion`_ import would fail. .. _vegafusion: https://github.com/vega/vegafusion """ skip_requires_scipy: pytest.MarkDecorator = pytest.mark.skipif( find_spec("scipy") is None, reason="`scipy` not installed." ) skip_requires_geopandas: pytest.MarkDecorator = pytest.mark.skipif( find_spec("geopandas") is None, reason="`geopandas` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `geopandas`_ import would fail. .. _geopandas: https://geopandas.org/ """ skip_requires_duckdb: pytest.MarkDecorator = pytest.mark.skipif( find_spec("duckdb") is None, reason="`duckdb` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `duckdb`_ import would fail. .. _duckdb: https://duckdb.org/ """ skip_requires_polars: pytest.MarkDecorator = pytest.mark.skipif( find_spec("polars") is None, reason="`polars` not installed." ) """ ``pytest.mark.skipif`` decorator. Applies when `polars`_ import would fail. .. _polars: https://pola.rs/ """ @overload def skip_requires_pyarrow( fn: None = ..., /, *, requires_tzdata: bool = ... ) -> pytest.MarkDecorator: ... @overload def skip_requires_pyarrow( fn: Markable, /, *, requires_tzdata: bool = ... ) -> Markable: ... def skip_requires_pyarrow( fn: Markable | None = None, /, *, requires_tzdata: bool = False ) -> pytest.MarkDecorator | Markable: """ ``pytest.mark.skipif`` decorator. Applies when `pyarrow`_ import would fail. Additionally, we mark as expected to fail on `Windows`. https://github.com/vega/altair/issues/3050 .. _pyarrow: https://pypi.org/project/pyarrow/ """ composed = pytest.mark.skipif( find_spec("pyarrow") is None, reason="`pyarrow` not installed." ) if requires_tzdata: composed = pytest.mark.xfail( sys.platform == "win32" and not windows_has_tzdata(), reason="Timezone database is not installed on Windows", )(composed) return composed if fn is None else composed(fn) def id_func_str_only(val) -> str: """ Ensures the generated test-id name uses only `filename` and not `source`. Without this, the name is repr(source code)-filename """ if not isinstance(val, str): return "" else: return val def _wrap_mark_specs( pattern_marks: Mapping[Pattern[str] | str, MarksType], / ) -> dict[Pattern[str], MarksType]: return { (re.compile(p) if not isinstance(p, re.Pattern) else p): marks for p, marks in pattern_marks.items() } def _fill_marks( mark_specs: dict[Pattern[str], MarksType], string: str, / ) -> MarksType | tuple[()]: it = (v for k, v in mark_specs.items() if k.search(string)) return next(it, ()) def _distributed_examples( *exclude_prefixes: str, marks: Mapping[Pattern[str] | str, MarksType] | None = None ) -> Iterator[ParameterSet]: """ Yields ``pytest.mark.parametrize`` arguments for all examples. Parameters ---------- *exclude_prefixes Any file starting with these will be **skipped**. marks Mapping of ``re.search(..., )`` patterns to ``pytest.param(marks=...)``. The **first** match (if any) will be inserted into ``marks``. """ RE_NAME: Pattern[str] = re.compile(r"^tests\.(.*)") mark_specs = _wrap_mark_specs(marks) if marks else {} for pkg in [examples_arguments_syntax, examples_methods_syntax]: pkg_name = pkg.__name__ if match := RE_NAME.match(pkg_name): pkg_name_unqual: str = match.group(1) else: msg = f"Failed to match pattern {RE_NAME.pattern!r} against {pkg_name!r}" raise ValueError(msg) for _, mod_name, is_pkg in pkgutil.iter_modules(pkg.__path__): if not (is_pkg or mod_name.startswith(exclude_prefixes)): file_name = f"{mod_name}.py" msg_name = f"{pkg_name_unqual}.{file_name}" if source := pkgutil.get_data(pkg_name, file_name): yield pytest.param( source, msg_name, marks=_fill_marks(mark_specs, msg_name) ) else: msg = ( f"Failed to get source data from `{pkg_name}.{file_name}`.\n" f"pkgutil.get_data(...) returned: {pkgutil.get_data(pkg_name, file_name)!r}" ) raise TypeError(msg) ignore_DataFrameGroupBy: pytest.MarkDecorator = pytest.mark.filterwarnings( "ignore:DataFrameGroupBy.apply.*:DeprecationWarning" ) """ ``pytest.mark.filterwarnings`` decorator. Hides ``pandas`` warning(s):: "ignore:DataFrameGroupBy.apply.*:DeprecationWarning" """ distributed_examples: pytest.MarkDecorator = pytest.mark.parametrize( ("source", "filename"), tuple( _distributed_examples( "_", "interval_selection_map_quakes", marks={ "beckers_barley.+facet": slow, "lasagna_plot": slow, "line_chart_with_cumsum_faceted": slow, "layered_bar_chart": slow, "multiple_interactions": slow, "layered_histogram": slow, "stacked_bar_chart_with_text": slow, "bar_chart_with_labels": slow, "interactive_cross_highlight": slow, "wind_vector_map": slow, r"\.point_map\.py": slow, "line_chart_with_color_datum": slow, "deviation_ellipses": skip_requires_scipy, }, ) ), ids=id_func_str_only, ) """ ``pytest.mark.parametrize`` decorator. Provides **all** examples, using both `arguments` & `methods` syntax. The decorated test can evaluate each resulting chart via:: from altair.utils.execeval import eval_block @distributed_examples def test_some_stuff(source: Any, filename: str) -> None: chart: ChartType | None = eval_block(source) ... # Perform any assertions Notes ----- - See `#3431 comment`_ for performance benefit. - `interval_selection_map_quakes` requires `#3418`_ fix .. _#3431 comment: https://github.com/vega/altair/pull/3431#issuecomment-2168508048 .. _#3418: https://github.com/vega/altair/issues/3418 """ ================================================ FILE: tests/altair_theme_test.py ================================================ # ruff: noqa: E711 from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from altair.typing import ChartType def alt_theme_test() -> ChartType: import altair as alt from altair.datasets import data us_10m = data.us_10m.url unemployment = data.unemployment.url movies = data.movies.url barley = data.barley.url iowa_electricity = data.iowa_electricity.url common_data = alt.InlineData( [ {"Index": 1, "Value": 28, "Position": 1, "Category": "A"}, {"Index": 2, "Value": 55, "Position": 2, "Category": "A"}, {"Index": 3, "Value": 43, "Position": 3, "Category": "A"}, {"Index": 4, "Value": 91, "Position": 4, "Category": "A"}, {"Index": 5, "Value": 81, "Position": 5, "Category": "A"}, {"Index": 6, "Value": 53, "Position": 6, "Category": "A"}, {"Index": 7, "Value": 19, "Position": 1, "Category": "B"}, {"Index": 8, "Value": 87, "Position": 2, "Category": "B"}, {"Index": 9, "Value": 52, "Position": 3, "Category": "B"}, {"Index": 10, "Value": 48, "Position": 4, "Category": "B"}, {"Index": 11, "Value": 24, "Position": 5, "Category": "B"}, {"Index": 12, "Value": 49, "Position": 6, "Category": "B"}, {"Index": 13, "Value": 87, "Position": 1, "Category": "C"}, {"Index": 14, "Value": 66, "Position": 2, "Category": "C"}, {"Index": 15, "Value": 17, "Position": 3, "Category": "C"}, {"Index": 16, "Value": 27, "Position": 4, "Category": "C"}, {"Index": 17, "Value": 68, "Position": 5, "Category": "C"}, {"Index": 18, "Value": 16, "Position": 6, "Category": "C"}, ] ) HEIGHT_SMALL = 140 STANDARD = 180 WIDTH_GEO = int(STANDARD * 1.667) bar = ( alt.Chart(common_data, height=HEIGHT_SMALL, width=STANDARD, title="Bar") .mark_bar() .encode( x=alt.X("Index:O").axis(offset=1), y=alt.Y("Value:Q"), tooltip="Value:Q" ) .transform_filter(alt.datum["Index"] <= 9) ) line = ( alt.Chart(common_data, height=HEIGHT_SMALL, width=STANDARD, title="Line") .mark_line() .encode( x=alt.X("Position:O").axis(grid=False), y=alt.Y("Value:Q").axis(grid=False), color=alt.Color("Category:N").legend(None), tooltip=["Index:O", "Value:Q", "Position:O", "Category:N"], ) ) point_shape = ( alt.Chart( common_data, height=HEIGHT_SMALL, width=STANDARD, title="Point (Shape)" ) .mark_point() .encode( x=alt.X("Position:O").axis(grid=False), y=alt.Y("Value:Q").axis(grid=False), shape=alt.Shape("Category:N").legend(None), color=alt.Color("Category:N").legend(None), tooltip=["Index:O", "Value:Q", "Position:O", "Category:N"], ) ) point = ( alt.Chart(movies, height=STANDARD, width=STANDARD, title="Point") .mark_point(tooltip=True) .transform_filter(alt.datum["IMDB Rating"] != None) .transform_filter( alt.FieldRangePredicate("Release Date", [None, alt.DateTime(year=2019)]) ) .transform_joinaggregate(Average_Rating="mean(IMDB Rating)") .transform_calculate( Rating_Delta=alt.datum["IMDB Rating"] - alt.datum.Average_Rating ) .encode( x=alt.X("Release Date:T").title("Release Date"), y=alt.Y("Rating_Delta:Q").title("Rating Delta"), color=alt.Color("Rating_Delta:Q").title("Rating Delta").scale(domainMid=0), ) ) bar_stack = ( alt.Chart(barley, height=STANDARD, width=STANDARD, title="Bar (Stacked)") .mark_bar(tooltip=True) .encode( x="sum(yield):Q", y=alt.Y("variety:N"), color=alt.Color("site:N").legend(orient="bottom", columns=2), ) ) area = ( alt.Chart(iowa_electricity, height=STANDARD, width=STANDARD, title="Area") .mark_area(tooltip=True) .encode( x=alt.X("year:T").title("Year"), y=alt.Y("net_generation:Q") .title("Share of net generation") .stack("normalize") .axis(format=".0%"), color=alt.Color("source:N") .title("Electricity source") .legend(orient="bottom", columns=2), ) ) geoshape = ( alt.Chart( alt.topo_feature(us_10m, "counties"), height=STANDARD, width=WIDTH_GEO, title=alt.Title("Geoshape", subtitle="Unemployment rate per county"), ) .mark_geoshape(tooltip=True) .encode(color="rate:Q") .transform_lookup( "id", alt.LookupData(alt.UrlData(unemployment), "id", ["rate"]) ) .project(type="albersUsa") ) compound_chart = ( (bar | line | point_shape) & (point | bar_stack) & (area | geoshape) ).properties( title=alt.Title( "Vega-Altair Theme Test", fontSize=20, subtitle="Adapted from https://vega.github.io/vega-themes/", ) ) return compound_chart ================================================ FILE: tests/examples_arguments_syntax/__init__.py ================================================ import os from typing import Set # Set of the names of examples that should have SVG static images. # This is for examples that VlConvert's PNG export does not support. SVG_EXAMPLES: Set[str] = {"isotype_emoji"} def iter_examples_arguments_syntax(): """Iterate over the examples in this directory. Each item is a dict with the following keys: - "name" : the unique name of the example - "filename" : the full file path to the example - "use_svg": Flag indicating whether the static image for the example should be an SVG instead of a PNG """ examples_arguments_syntax_dir = os.path.abspath(os.path.dirname(__file__)) for filename in os.listdir(examples_arguments_syntax_dir): name, ext = os.path.splitext(filename) if name.startswith("_") or ext != ".py": continue yield { "name": name, "filename": os.path.join(examples_arguments_syntax_dir, filename), "use_svg": name in SVG_EXAMPLES, } ================================================ FILE: tests/examples_arguments_syntax/airport_connections.py ================================================ """ Connections Among U.S. Airports Interactive ------------------------------------------- This example shows all the connections between major U.S. airports. Lookup transformations are used to find the coordinates of each airport and connecting airports. Connections are displayed on pointerover via a single selection. """ # category: case studies import altair as alt from altair.datasets import data # Since these data are each more than 5,000 rows we'll import from the URLs airports = data.airports.url flights_airport = data.flights_airport.url states = alt.topo_feature(data.us_10m.url, feature="states") # Create pointerover selection select_city = alt.selection_point( on="pointerover", nearest=True, fields=["origin"], empty=False ) # Define which attributes to lookup from airports.csv lookup_data = alt.LookupData( airports, key="iata", fields=["state", "latitude", "longitude"] ) background = alt.Chart(states).mark_geoshape( fill="lightgray", stroke="white" ).properties( width=750, height=500 ).project("albersUsa") connections = alt.Chart(flights_airport).mark_rule(opacity=0.35).encode( latitude="latitude:Q", longitude="longitude:Q", latitude2="lat2:Q", longitude2="lon2:Q" ).transform_lookup( lookup="origin", from_=lookup_data ).transform_lookup( lookup="destination", from_=lookup_data, as_=["state", "lat2", "lon2"] ).transform_filter( select_city ) points = alt.Chart(flights_airport).mark_circle().encode( latitude="latitude:Q", longitude="longitude:Q", size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 1000]), legend=None), order=alt.Order("routes:Q", sort="descending"), tooltip=["origin:N", "routes:Q"] ).transform_aggregate( routes="count()", groupby=["origin"] ).transform_lookup( lookup="origin", from_=lookup_data ).transform_filter( (alt.datum.state != "PR") & (alt.datum.state != "VI") ).add_params( select_city ) (background + connections + points).configure_view(stroke=None) ================================================ FILE: tests/examples_arguments_syntax/annual_weather_heatmap.py ================================================ """ Annual Weather Heatmap ---------------------- """ # category: tables import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source, title="Daily Max Temperatures (C) in Seattle, WA").mark_rect().encode( x=alt.X("date(date):O", title="Day", axis=alt.Axis(format="%e", labelAngle=0)), y=alt.Y("month(date):O", title="Month"), color=alt.Color("max(temp_max)", legend=alt.Legend(title=None)), tooltip=[ alt.Tooltip("monthdate(date)", title="Date"), alt.Tooltip("max(temp_max)", title="Max Temp"), ], ).configure_view(step=13, strokeWidth=0).configure_axis(domain=False) ================================================ FILE: tests/examples_arguments_syntax/anscombe_plot.py ================================================ """ Anscombe's Quartet ------------------ `Anscombe's Quartet `_ is a famous dataset constructed by Francis Anscombe. It is made of 4 different subsets of data. Each subset has very different characteristics, even though common summary statistics such as mean and variance are identical. This example shows how to make a faceted plot, with each facet showing a different subset of the data. """ # category: case studies import altair as alt from altair.datasets import data source = data.anscombe() alt.Chart(source).mark_circle().encode( alt.X("X", scale=alt.Scale(zero=False)), alt.Y("Y", scale=alt.Scale(zero=False)), alt.Facet("Series", columns=2), ).properties( width=180, height=180, ) ================================================ FILE: tests/examples_arguments_syntax/area_chart_gradient.py ================================================ """ Area Chart with Gradient ------------------------ This example shows how to make an area chart with a gradient fill. For more information about gradient options see the Vega-Lite `Gradient documentation `_. """ # category: area charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).transform_filter( 'datum.symbol==="GOOG"' ).mark_area( line={'color':'darkgreen'}, color=alt.Gradient( gradient='linear', stops=[alt.GradientStop(color='white', offset=0), alt.GradientStop(color='darkgreen', offset=1)], x1=1, x2=1, y1=1, y2=0 ) ).encode( alt.X('date:T'), alt.Y('price:Q') ) ================================================ FILE: tests/examples_arguments_syntax/area_faceted.py ================================================ """ Faceted Area Chart ------------------ Multiple area subcharts, one for each company. We also show filtering out one of the companies, and sorting the companies in a custom order. """ # category: area charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).transform_filter( alt.datum.symbol != "GOOG", ).mark_area().encode( x="date:T", y="price:Q", color="symbol:N", row=alt.Row("symbol:N", sort=["MSFT", "AAPL", "IBM", "AMZN"]), ).properties(height=50, width=400) ================================================ FILE: tests/examples_arguments_syntax/bar_and_line_with_dual_axis.py ================================================ """ Bar Chart with Line on Dual Axis -------------------------------- This example shows how to combine two plots and keep their axes. For a more polished version of this chart, see :ref:`gallery_wheat_wages`. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() base = alt.Chart(source).encode(x='year:O') bar = base.mark_bar().encode(y='wheat:Q') line = base.mark_line(color='red').encode( y='wages:Q' ) (bar + line).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_faceted_compact.py ================================================ """ Compact Faceted Grid of Bar Charts ================================== A simple grid of bar charts to compare performance data, one subchart for each subset of the data. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( [ {"a": "a1", "b": "b1", "c": "x", "p": "0.14"}, {"a": "a1", "b": "b1", "c": "y", "p": "0.60"}, {"a": "a1", "b": "b1", "c": "z", "p": "0.03"}, {"a": "a1", "b": "b2", "c": "x", "p": "0.80"}, {"a": "a1", "b": "b2", "c": "y", "p": "0.38"}, {"a": "a1", "b": "b2", "c": "z", "p": "0.55"}, {"a": "a1", "b": "b3", "c": "x", "p": "0.11"}, {"a": "a1", "b": "b3", "c": "y", "p": "0.58"}, {"a": "a1", "b": "b3", "c": "z", "p": "0.79"}, {"a": "a2", "b": "b1", "c": "x", "p": "0.83"}, {"a": "a2", "b": "b1", "c": "y", "p": "0.87"}, {"a": "a2", "b": "b1", "c": "z", "p": "0.67"}, {"a": "a2", "b": "b2", "c": "x", "p": "0.97"}, {"a": "a2", "b": "b2", "c": "y", "p": "0.84"}, {"a": "a2", "b": "b2", "c": "z", "p": "0.90"}, {"a": "a2", "b": "b3", "c": "x", "p": "0.74"}, {"a": "a2", "b": "b3", "c": "y", "p": "0.64"}, {"a": "a2", "b": "b3", "c": "z", "p": "0.19"}, {"a": "a3", "b": "b1", "c": "x", "p": "0.57"}, {"a": "a3", "b": "b1", "c": "y", "p": "0.35"}, {"a": "a3", "b": "b1", "c": "z", "p": "0.49"}, {"a": "a3", "b": "b2", "c": "x", "p": "0.91"}, {"a": "a3", "b": "b2", "c": "y", "p": "0.38"}, {"a": "a3", "b": "b2", "c": "z", "p": "0.91"}, {"a": "a3", "b": "b3", "c": "x", "p": "0.99"}, {"a": "a3", "b": "b3", "c": "y", "p": "0.80"}, {"a": "a3", "b": "b3", "c": "z", "p": "0.37"}, ] ) alt.Chart(source, width=60, height=alt.Step(8)).mark_bar().encode( y=alt.Y("c:N", axis=None), x=alt.X("p:Q", title=None, axis=alt.Axis(format="%")), color=alt.Color( "c:N", title="settings", legend=alt.Legend(orient="bottom", titleOrient="left") ), row=alt.Row("a:N", title="Factor A", header=alt.Header(labelAngle=0)), column=alt.Column("b:N", title="Factor B"), ) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_horizontal.py ================================================ """ Horizontal Bar Chart -------------------- This example is a bar chart drawn horizontally by putting the quantitative value on the x axis. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() alt.Chart(source).mark_bar().encode( x='wheat:Q', y="year:O" ).properties(height=700) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_sorted.py ================================================ """ Sorted Bar Chart ================ This example shows a bar chart sorted by a calculated value. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield):Q', y=alt.Y('site:N', sort='-x') ) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_highlighted_bar.py ================================================ """ Bar Chart with Highlighted Bar ------------------------------ This example shows a basic bar chart with a single bar highlighted. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() # If the `year` column equals `1810` # then, set the bar color to `"orange"` # otherwise, use `"steelblue"` color = alt.when(year=1810).then(alt.value("orange")).otherwise(alt.value("steelblue")) alt.Chart(source).mark_bar().encode( x="year:O", y="wheat:Q", color=color ).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_highlighted_segment.py ================================================ """ Bar Chart with Highlighted Segment ---------------------------------- This example shows a bar chart that highlights values beyond a threshold. """ # category: bar charts import altair as alt import pandas as pd from altair.datasets import data source = data.wheat() threshold = pd.DataFrame([{"threshold": 90}]) bars = alt.Chart(source).mark_bar().encode( x="year:O", y="wheat:Q", ) highlight = alt.Chart(source).mark_bar(color="#e45755").encode( x='year:O', y='baseline:Q', y2='wheat:Q' ).transform_filter( alt.datum.wheat > 90 ).transform_calculate("baseline", "90") rule = alt.Chart(threshold).mark_rule().encode( y='threshold:Q' ) (bars + highlight + rule).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_labels.py ================================================ """ Bar Chart with Labels ===================== This example shows a basic horizontal bar chart with labels created with Altair. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() base = alt.Chart(source).encode( x='wheat', y="year:O", text='wheat' ) base.mark_bar() + base.mark_text(align='left', dx=2) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_labels_measured_luminance.py ================================================ """ Bar Chart with Labels based on Measured Luminance ================================================= This example shows a basic horizontal bar chart with labels where the measured luminance to decides if the text overlay is be colored ``black`` or ``white``. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() base = alt.Chart(source).encode( x=alt.X('sum(yield):Q', stack='zero'), y=alt.Y('site:O', sort='-x'), text=alt.Text('sum(yield):Q', format='.0f') ) bars = base.mark_bar( tooltip=alt.expr("luminance(scale('color', datum.sum_yield))") ).encode( color='sum(yield):Q' ) text = base.mark_text( align='right', dx=-3, color=alt.expr("luminance(scale('color', datum.sum_yield)) > 0.5 ? 'black' : 'white'") ) bars + text ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_mean_line.py ================================================ """ Bar Chart with Line at Mean --------------------------- This example shows the mean value overlaid on a bar chart. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() bar = alt.Chart(source).mark_bar().encode( x='year:O', y='wheat:Q' ) rule = alt.Chart(source).mark_rule(color='red').encode( y='mean(wheat):Q' ) (bar + rule).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_negatives.py ================================================ """ Bar Chart with Negative Values ============================== This example shows a bar chart with both positive and negative values. """ # category: bar charts import altair as alt from altair.datasets import data source = data.us_employment() predicate = alt.datum.nonfarm_change > 0 color = alt.when(predicate).then(alt.value("steelblue")).otherwise(alt.value("orange")) alt.Chart(source).mark_bar().encode( x="month:T", y="nonfarm_change:Q", color=color ).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_range.py ================================================ """ Bar Chart with Range ==================== This example shows a range bar chart where each bar displays information of a low and high value. """ # category: bar charts import altair as alt from altair.datasets import data source = data.seattle_weather() bar = alt.Chart(source).mark_bar(cornerRadius=10, height=10).encode( x=alt.X('min(temp_min):Q', scale=alt.Scale(domain=[-15, 45]), title='Temperature (°C)'), x2='max(temp_max):Q', y=alt.Y('month(date):O', title=None) ) text_min = alt.Chart(source).mark_text(align='right', dx=-5).encode( x='min(temp_min):Q', y=alt.Y('month(date):O'), text='min(temp_min):Q' ) text_max = alt.Chart(source).mark_text(align='left', dx=5).encode( x='max(temp_max):Q', y=alt.Y('month(date):O'), text='max(temp_max):Q' ) (bar + text_min + text_max).properties( title=alt.Title(text='Temperature variation by month', subtitle='Seatle weather, 2012-2015') ) ================================================ FILE: tests/examples_arguments_syntax/bar_chart_with_single_threshold.py ================================================ """ Bar Chart Highlighting Values beyond a Threshold ------------------------------------------------ This example shows a bar chart highlighting values beyond a threshold. """ # category: bar charts import pandas as pd import altair as alt source = pd.DataFrame({ "Day": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "Value": [55, 112, 65, 38, 80, 138, 120, 103, 395, 200, 72, 51, 112, 175, 131] }) threshold = 300 bars = alt.Chart(source).mark_bar(color="steelblue").encode( x="Day:O", y="Value:Q", ) highlight = bars.mark_bar(color="#e45755").encode( y2=alt.Y2(datum=threshold) ).transform_filter( alt.datum.Value > threshold ) rule = alt.Chart().mark_rule().encode( y=alt.Y(datum=threshold) ) label = rule.mark_text( x="width", dx=-2, align="right", baseline="bottom", text="hazardous" ) (bars + highlight + rule + label) ================================================ FILE: tests/examples_arguments_syntax/bar_faceted_stacked.py ================================================ """ Faceted Stacked Bar Chart ========================= A horizontal stacked bar chart using barley crop yield data. The chart is horizontally faceted based on the year, and vertically faceted based on variety. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( column="year:O", x="yield", y="variety", color="site", ).properties(width=220) ================================================ FILE: tests/examples_arguments_syntax/bar_rounded.py ================================================ """ Bar Chart with Rounded Edges ---------------------------- This example shows how to create a bar chart with rounded edges. """ # category: bar charts import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source).mark_bar( cornerRadiusTopLeft=3, cornerRadiusTopRight=3 ).encode( x='month(date):O', y='count():Q', color='weather:N' ) ================================================ FILE: tests/examples_arguments_syntax/bar_with_rolling_mean.py ================================================ """ Bar Chart with Rolling Mean --------------------------- A bar chart overlaid with a rolling mean. In this example the average of values over the previous decade is displayed as a line. """ # category: bar charts import altair as alt from altair.datasets import data source = data.wheat() bar = alt.Chart(source).mark_bar().encode( x='year:O', y='wheat:Q' ) line = alt.Chart(source).mark_line(color='red').transform_window( # The field to average rolling_mean='mean(wheat)', # The number of values before and after the current value to include. frame=[-9, 0] ).encode( x='year:O', y='rolling_mean:Q' ) (bar + line).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/beckers_barley_facet.py ================================================ """ Becker's Barley Faceted Plot ---------------------------- The example demonstrates the faceted charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique where each row is a different site (i.e. the chart is faceted by site), they identified an anomaly in a widely used agriculatural dataset, where the "Morris" site accidentally had the years 1931 and 1932 swapped. They named this `"The Morris Mistake." `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source, title="The Morris Mistake").mark_point().encode( alt.X( 'yield:Q', title="Barley Yield (bushels/acre)", scale=alt.Scale(zero=False), axis=alt.Axis(grid=False) ), alt.Y( 'variety:N', title="", sort='-x', axis=alt.Axis(grid=True) ), color=alt.Color('year:N', legend=alt.Legend(title="Year")), row=alt.Row( 'site:N', title="", sort=alt.EncodingSortField(field='yield', op='sum', order='descending'), ) ).properties( height=alt.Step(20) ).configure_view(stroke="transparent") ================================================ FILE: tests/examples_arguments_syntax/beckers_barley_wrapped_facet.py ================================================ """ Becker's Barley Wrapped Facet Plot ---------------------------------- The example demonstrates the faceted charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique where each row is a different site (i.e. the chart is faceted by site), they identified an anomaly in a widely used agriculatural dataset, where the "Morris" site accidentally had the years 1931 and 1932 swapped. They named this `"The Morris Mistake." `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.barley.url alt.Chart(source).mark_point().encode( alt.X("median(yield):Q", scale=alt.Scale(zero=False)), y="variety:O", color="year:N", facet=alt.Facet("site:O", columns=2), ).properties( width=200, height=100, ) ================================================ FILE: tests/examples_arguments_syntax/boxplot.py ================================================ """ Boxplot with Min/Max Whiskers ------------------------------ This example shows how to make a boxplot using US Population data from 2000. Note that the default value of the `extent` property is 1.5, which represents the convention of extending the whiskers to the furthest points within 1.5 * IQR from the first and third quartile. """ # category: distributions import altair as alt from altair.datasets import data source = data.population.url alt.Chart(source).mark_boxplot(extent='min-max').encode( x='age:O', y='people:Q' ) ================================================ FILE: tests/examples_arguments_syntax/bubble_plot.py ================================================ """ Bubble Plot ----------------- This example shows how to make a bubble plot. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_point().encode( x='Horsepower', y='Miles_per_Gallon', size='Acceleration' ) ================================================ FILE: tests/examples_arguments_syntax/bump_chart.py ================================================ """ Bump Chart ---------- This example shows a bump chart. The data is first grouped into six-month intervals using pandas. The ranks are computed by Altair using a window transform. """ # category: line charts import altair as alt from altair.datasets import data import pandas as pd stocks = data.stocks() source = stocks.groupby([pd.Grouper(key="date", freq="6MS"),"symbol"]).mean().reset_index() alt.Chart(source).mark_line(point = True).encode( x = alt.X("date:O", timeUnit="yearmonth", title="date"), y="rank:O", color=alt.Color("symbol:N") ).transform_window( rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"] ).properties( title="Bump Chart for Stock Prices", width=600, height=150, ) ================================================ FILE: tests/examples_arguments_syntax/calculate_residuals.py ================================================ """ Calculate Residuals ------------------- A dot plot showing each movie in the database, and the difference from the average movie rating. The display is sorted by year to visualize everything in sequential order. The graph is for all Movies before 2019. Adapted from `Calculate Residuals `_. """ # category: advanced calculations import altair as alt from altair.datasets import data imdb_rating = alt.datum["IMDB Rating"] source = data.movies.url chart = ( alt.Chart(source) .mark_point() .transform_filter(imdb_rating != None) .transform_filter( alt.FieldRangePredicate("Release Date", [None, 2019], timeUnit="year") ) .transform_joinaggregate(Average_Rating="mean(IMDB Rating)") .transform_calculate(Rating_Delta=imdb_rating - alt.datum.Average_Rating) .encode( x=alt.X("Release Date:T", title="Release Date"), y=alt.Y("Rating_Delta:Q", title="Rating Delta"), color=alt.Color( "Rating_Delta:Q", title="Rating Delta", scale=alt.Scale(domainMid=0), ), ) ) chart ================================================ FILE: tests/examples_arguments_syntax/candlestick_chart.py ================================================ """ Candlestick Chart ================= A candlestick chart inspired from `Protovis `_. This example shows the performance of the Chicago Board Options Exchange `Volatility Index `_ (VIX) in the summer of 2009. The thick bar represents the opening and closing prices, while the thin bar shows intraday high and low prices; if the index closed higher on a given day, the bars are colored green rather than red. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.ohlc() open_close_color = ( alt.when("datum.open <= datum.close") .then(alt.value("#06982d")) .otherwise(alt.value("#ae1325")) ) base = alt.Chart(source).encode( alt.X('date:T', axis=alt.Axis( format='%m/%d', labelAngle=-45, title='Date in 2009' ) ), color=open_close_color ) rule = base.mark_rule().encode( alt.Y( 'low:Q', title='Price', scale=alt.Scale(zero=False), ), alt.Y2('high:Q') ) bar = base.mark_bar().encode( alt.Y('open:Q'), alt.Y2('close:Q') ) rule + bar ================================================ FILE: tests/examples_arguments_syntax/choropleth.py ================================================ """ Choropleth Map ============== A choropleth map of unemployment rate per county in the US """ # category: maps import altair as alt from altair.datasets import data counties = alt.topo_feature(data.us_10m.url, 'counties') source = data.unemployment.url alt.Chart(counties).mark_geoshape().encode( color='rate:Q' ).transform_lookup( lookup='id', from_=alt.LookupData(source, 'id', ['rate']) ).project( type='albersUsa' ).properties( width=500, height=300 ) ================================================ FILE: tests/examples_arguments_syntax/choropleth_repeat.py ================================================ """ Repeated Choropleth Map ======================= Three choropleths representing disjoint data from the same table. """ # category: maps import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, 'states') source = data.population_engineers_hurricanes.url variable_list = ['population', 'engineers', 'hurricanes'] alt.Chart(states).mark_geoshape().encode( alt.Color(alt.repeat('row'), type='quantitative') ).transform_lookup( lookup='id', from_=alt.LookupData(source, 'id', variable_list) ).properties( width=500, height=300 ).project( type='albersUsa' ).repeat( row=variable_list ).resolve_scale( color='independent' ) ================================================ FILE: tests/examples_arguments_syntax/co2_concentration.py ================================================ """ Atmospheric CO2 Concentration ----------------------------- This example is a fully developed line chart that uses a window transformation. It was inspired by `Gregor Aisch's work at datawrapper `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.co2_concentration.url base = alt.Chart( source, title="Carbon Dioxide in the Atmosphere" ).transform_calculate( year="year(datum.Date)" ).transform_calculate( decade="floor(datum.year / 10)" ).transform_calculate( scaled_date="(datum.year % 10) + (month(datum.Date)/12)" ).transform_window( first_date='first_value(scaled_date)', last_date='last_value(scaled_date)', sort=[{"field": "scaled_date", "order": "ascending"}], groupby=['decade'], frame=[None, None] ).transform_calculate( end="datum.first_date === datum.scaled_date ? 'first' : datum.last_date === datum.scaled_date ? 'last' : null" ).encode( x=alt.X( "scaled_date:Q", axis=alt.Axis(title="Year into Decade", tickCount=11) ), y=alt.Y( "CO2:Q", title="CO2 concentration in ppm", scale=alt.Scale(zero=False) ) ) line = base.mark_line().encode( color=alt.Color( "decade:O", scale=alt.Scale(scheme="magma"), legend=None ) ) text = base.encode(text="year:N") start_year = text.transform_filter( alt.datum.end == 'first' ).mark_text(baseline="top") end_year = text.transform_filter( alt.datum.end == 'last' ).mark_text(baseline="bottom") (line + start_year + end_year).configure_text( align="left", dx=1, dy=3 ).properties(width=600, height=375) ================================================ FILE: tests/examples_arguments_syntax/comet_chart.py ================================================ """ Comet Chart ----------- Inspired by `Zan Armstrong's comet chart `_ this plot uses ``mark_trail`` to visualize change of grouped data over time. A more elaborate example and explanation of creating comet charts in Altair is shown in `this blogpost `_. """ # category: advanced calculations import altair as alt from altair.datasets import data ( alt.Chart(data.barley.url) .transform_pivot("year", value="yield", groupby=["variety", "site"]) .transform_fold(["1931", "1932"], as_=["year", "yield"]) .transform_calculate(calculate="datum['1932'] - datum['1931']", as_="delta") .mark_trail() .encode( x=alt.X('year:O', title=None), y=alt.Y('variety:N', title='Variety'), size=alt.Size('yield:Q', scale=alt.Scale(range=[0, 12]), legend=alt.Legend(values=[20, 60], title='Barley Yield (bushels/acre)')), color=alt.Color('delta:Q', scale=alt.Scale(domainMid=0), legend=alt.Legend(title='Yield Delta (%)')), tooltip=alt.Tooltip(['year:O', 'yield:Q']), column=alt.Column('site:N', title='Site') ) .configure_view(stroke=None) .configure_legend(orient='bottom', direction='horizontal') .properties(title='Barley Yield comparison between 1932 and 1931') ) ================================================ FILE: tests/examples_arguments_syntax/cumulative_count_chart.py ================================================ """ Cumulative Count Chart ---------------------- This example shows an area chart with cumulative count. Adapted from https://vega.github.io/vega-lite/examples/area_cumulative_freq.html """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).transform_window( cumulative_count="count()", sort=[{"field": "IMDB Rating"}], ).mark_area().encode( x="IMDB Rating:Q", y=alt.Y("cumulative_count:Q", stack=False) ) ================================================ FILE: tests/examples_arguments_syntax/dendrogram.py ================================================ """ Dendrogram of Hierarchical Clustering ------------------------------------- This is a dendrogram from the result of a hierarchical clustering. It's based on the example from https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html """ # category: case studies import pandas as pd import altair as alt import numpy as np # the variable `den` shown below is an exemplary output of `scipy.cluster.hierarchy.dendrogram` # (https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.dendrogram.html#scipy.cluster.hierarchy.dendrogram) # where the dendrogram itself is truncated such that no more than 3 levels of the dendrogram tree are shown. den = { 'dcoord': [[0.0, 0.8187388676087964, 0.8187388676087964, 0.0], [0.0, 1.105139508538779, 1.105139508538779, 0.0], [0.8187388676087964, 1.3712698320830048, 1.3712698320830048, 1.105139508538779], [0.0, 0.9099819926189507, 0.9099819926189507, 0.0], [0.0, 1.2539936203984452, 1.2539936203984452, 0.0], [0.9099819926189507, 1.9187528699821954, 1.9187528699821954, 1.2539936203984452], [1.3712698320830048, 3.828052620290243, 3.828052620290243, 1.9187528699821954], [0.0, 1.7604450194955439, 1.7604450194955439, 0.0], [0.0, 1.845844754344974, 1.845844754344974, 0.0], [1.7604450194955439, 4.847708507921838, 4.847708507921838, 1.845844754344974], [0.0, 2.8139388316471536, 2.8139388316471536, 0.0], [0.0, 2.8694176394568705, 2.8694176394568705, 0.0], [2.8139388316471536, 6.399406819518539, 6.399406819518539, 2.8694176394568705], [4.847708507921838, 12.300396052792589, 12.300396052792589, 6.399406819518539], [3.828052620290243, 32.44760699959244, 32.44760699959244, 12.300396052792589]], 'icoord': [[5.0, 5.0, 15.0, 15.0], [25.0, 25.0, 35.0, 35.0], [10.0, 10.0, 30.0, 30.0], [45.0, 45.0, 55.0, 55.0], [65.0, 65.0, 75.0, 75.0], [50.0, 50.0, 70.0, 70.0], [20.0, 20.0, 60.0, 60.0], [85.0, 85.0, 95.0, 95.0], [105.0, 105.0, 115.0, 115.0], [90.0, 90.0, 110.0, 110.0], [125.0, 125.0, 135.0, 135.0], [145.0, 145.0, 155.0, 155.0], [130.0, 130.0, 150.0, 150.0], [100.0, 100.0, 140.0, 140.0], [40.0, 40.0, 120.0, 120.0]], 'ivl': [ '(7)', '(8)', '41', '(5)', '(10)', '(7)', '(4)', '(8)', '(9)', '(15)', '(5)', '(7)', '(4)', '(22)', '(15)', '(23)' ], } def get_leaf_loc(den): """ Get the location of the leaves """ _from = int(np.array(den["icoord"]).min()) _to = int(np.array(den["icoord"]).max() + 1) return range(_from, _to, 10) def get_df_coord(den): """ Get coordinate dataframe. """ # if you view the dendrogram as a collection of upside-down "U" shapes, then # we can regard the 4 corners of the upside-down "U" as points 1, 2, 3 and 4. cols_xk = ["xk1", "xk2", "xk3", "xk4"] cols_yk = ["yk1", "yk2", "yk3", "yk4"] df_coord = pd.merge( pd.DataFrame(den["icoord"], columns=cols_xk), pd.DataFrame(den["dcoord"], columns=cols_yk), left_index=True, right_index=True ) return df_coord source = get_df_coord(den) base = alt.Chart(source) # the U shape is composed of a shoulder plus two arms shoulder = base.mark_rule().encode( alt.X("xk2:Q", title=""), alt.X2("xk3:Q"), alt.Y("yk2:Q", title="") ) arm1 = base.mark_rule().encode( alt.X("xk1:Q"), alt.Y("yk1:Q"), alt.Y2("yk2:Q") ) arm2 = base.mark_rule().encode( alt.X("xk3:Q"), alt.Y("yk3:Q"), alt.Y2("yk4:Q") ) chart_den = shoulder + arm1 + arm2 df_text = pd.DataFrame(dict(labels=den["ivl"], x=get_leaf_loc(den))) chart_text = alt.Chart( df_text ).mark_text( dy=0, angle=0, align="center" ).encode( x = alt.X("x:Q", axis={"grid":False, "title":"Number of points in nodes"}), text = alt.Text("labels:N") ) (chart_den & chart_text).resolve_scale( x="shared" ).configure( padding={"top":10,"left":10} ).configure_concat( spacing=0 ).configure_axis( labels=False, ticks=False, grid=False ).properties( title="Hierarchical Clustering Dendrogram" ) ================================================ FILE: tests/examples_arguments_syntax/density_repeat.py ================================================ """ Repeated Density Estimates -------------------------- Density estimates for each measurement of penguins. This is what we call a "repeated" plot, with one subplot for each measurement type. All measurements are in millimeters, making them directly comparable on a shared x-axis. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).transform_fold( [ "Beak Length (mm)", "Beak Depth (mm)", "Flipper Length (mm)", ], as_=["Measurement Type", "value"], ).transform_density( density="value", groupby=["Measurement Type"] ).mark_area().encode( alt.X("value:Q"), alt.Y("density:Q"), alt.Row("Measurement Type:N", header=alt.Header(labelAngle=0, labelAlign="left")) ).properties( width=300, height=50 ) ================================================ FILE: tests/examples_arguments_syntax/density_stack.py ================================================ """ Stacked Density Estimates ------------------------- To plot a stacked graph of estimates, use a shared ``extent`` and a fixed number of subdivision ``steps`` to ensure that the points for each area align well. Density estimates of body mass for each penguin species are plotted in a stacked method. In addition, setting ``counts`` to true multiplies the densities by the number of data points in each group, preserving proportional differences. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).transform_density( density='Body Mass (g)', groupby=['Species'], extent= [2500, 6500], counts = True, steps=200 ).mark_area().encode( alt.X('value:Q').title('Body Mass (g)'), alt.Y('density:Q', stack='zero'), alt.Color('Species:N') ).properties( width=400, height=80, title='Distribution of Body Mass of Penguins' ) ================================================ FILE: tests/examples_arguments_syntax/deviation_ellipses.py ================================================ """ Confidence Interval Ellipses ---------------------------- This example shows bivariate deviation ellipses of flipper length and body mass of three penguin species. Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_ .. _ggplot2.stat_ellipse: https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples .. _Deviation ellipses example: https://github.com/vega/altair/pull/514 .. _@essicolo: https://github.com/essicolo """ # category: case studies import numpy as np import pandas as pd from scipy.stats import f as F import altair as alt from altair.datasets import data def confidence_region_2d(arr, conf_level=0.95, segments=50): """ Calculate confidence interval ellipse. Parameters ---------- arr numpy array with 2 columns conf_level lower tail probability segments number of points describing the ellipse. """ n_elements = len(arr) # Degrees of freedom of the chi-squared distribution in the **numerator** dfn = 2 # Degrees of freedom of the chi-squared distribution in the **denominator** dfd = n_elements - 1 # Percent point function at `conf_level` of an F continuous random variable quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd) radius = np.sqrt(2 * quantile) angles = np.arange(0, segments) * 2 * np.pi / segments circle = np.column_stack((np.cos(angles), np.sin(angles))) center = np.mean(arr, axis=0) cov_mat = np.cov(arr, rowvar=False) return center + radius * (circle @ np.linalg.cholesky(cov_mat).T) def grouped_confidence_regions(df, col_x, col_y, col_group): cols = [col_x, col_y] ellipses = [] ser: pd.Series[float] = df[col_group] for group in ser.drop_duplicates(): arr = df.loc[ser == group, cols].to_numpy(dtype=np.float64) ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols) ellipse[col_group] = group ellipses.append(ellipse) return pd.concat(ellipses).reset_index(names="order") col_x = "Flipper Length (mm)" col_y = "Body Mass (g)" col_group = "Species" x = alt.X(col_x, scale=alt.Scale(zero=False)) y = alt.Y(col_y, scale=alt.Scale(zero=False)) color = alt.Color(col_group) source = data.penguins().dropna(subset=[col_x, col_y, col_group]) ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group) points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode( x=x, y=y, color=color ) lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode( x=x, y=y, color=color, order="order" ) chart = (lines + points).properties(height=500, width=500) chart ================================================ FILE: tests/examples_arguments_syntax/distributions_and_medians_of_likert_scale_ratings.py ================================================ """ Distributions and Medians of Likert Scale Ratings ------------------------------------------------- Distributions and Medians of Likert Scale Ratings. (Figure 9 from @jhoffswell and @zcliu’s ‘Interactive Repair of Tables Extracted from PDF Documents on Mobile Devices’ – http://idl.cs.washington.edu/files/2019-InteractiveTableRepair-CHI.pdf). Adapted from `Distributions and Medians of Likert Scale Ratings `_. """ # category: distributions import altair as alt import pandas as pd medians = pd.DataFrame( [ {"name": "Identify Errors:", "median": 1.999976, "lo": "Easy", "hi": "Hard"}, {"name": "Fix Errors:", "median": 2, "lo": "Easy", "hi": "Hard"}, { "name": "Easier to Fix:", "median": 1.999969, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Faster to Fix:", "median": 2.500045, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Easier on Phone:", "median": 1.500022, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Easier on Tablet:", "median": 2.99998, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Device Preference:", "median": 4.500007, "lo": "Phone", "hi": "Tablet", }, ] ) values = pd.DataFrame( [ {"value": "P1", "name": "Participant ID", "id": "P1"}, {"value": 2, "name": "Identify Errors:", "id": "P1"}, {"value": 2, "name": "Fix Errors:", "id": "P1"}, {"value": 3, "name": "Easier to Fix:", "id": "P1"}, {"value": 4, "name": "Faster to Fix:", "id": "P1"}, {"value": 2, "name": "Easier on Phone:", "id": "P1"}, {"value": 5, "name": "Easier on Tablet:", "id": "P1"}, {"value": 5, "name": "Device Preference:", "id": "P1"}, {"value": 1, "name": "Tablet_First", "id": "P1"}, {"value": 1, "name": "Toolbar_First", "id": "P1"}, {"value": "P2", "name": "Participant ID", "id": "P2"}, {"value": 2, "name": "Identify Errors:", "id": "P2"}, {"value": 3, "name": "Fix Errors:", "id": "P2"}, {"value": 4, "name": "Easier to Fix:", "id": "P2"}, {"value": 5, "name": "Faster to Fix:", "id": "P2"}, {"value": 5, "name": "Easier on Phone:", "id": "P2"}, {"value": 5, "name": "Easier on Tablet:", "id": "P2"}, {"value": 5, "name": "Device Preference:", "id": "P2"}, {"value": 1, "name": "Tablet_First", "id": "P2"}, {"value": 1, "name": "Toolbar_First", "id": "P2"}, {"value": "P3", "name": "Participant ID", "id": "P3"}, {"value": 2, "name": "Identify Errors:", "id": "P3"}, {"value": 2, "name": "Fix Errors:", "id": "P3"}, {"value": 2, "name": "Easier to Fix:", "id": "P3"}, {"value": 1, "name": "Faster to Fix:", "id": "P3"}, {"value": 2, "name": "Easier on Phone:", "id": "P3"}, {"value": 1, "name": "Easier on Tablet:", "id": "P3"}, {"value": 5, "name": "Device Preference:", "id": "P3"}, {"value": 1, "name": "Tablet_First", "id": "P3"}, {"value": 0, "name": "Toolbar_First", "id": "P3"}, {"value": "P4", "name": "Participant ID", "id": "P4"}, {"value": 3, "name": "Identify Errors:", "id": "P4"}, {"value": 3, "name": "Fix Errors:", "id": "P4"}, {"value": 2, "name": "Easier to Fix:", "id": "P4"}, {"value": 2, "name": "Faster to Fix:", "id": "P4"}, {"value": 4, "name": "Easier on Phone:", "id": "P4"}, {"value": 1, "name": "Easier on Tablet:", "id": "P4"}, {"value": 5, "name": "Device Preference:", "id": "P4"}, {"value": 1, "name": "Tablet_First", "id": "P4"}, {"value": 0, "name": "Toolbar_First", "id": "P4"}, {"value": "P5", "name": "Participant ID", "id": "P5"}, {"value": 2, "name": "Identify Errors:", "id": "P5"}, {"value": 2, "name": "Fix Errors:", "id": "P5"}, {"value": 4, "name": "Easier to Fix:", "id": "P5"}, {"value": 4, "name": "Faster to Fix:", "id": "P5"}, {"value": 4, "name": "Easier on Phone:", "id": "P5"}, {"value": 5, "name": "Easier on Tablet:", "id": "P5"}, {"value": 5, "name": "Device Preference:", "id": "P5"}, {"value": 0, "name": "Tablet_First", "id": "P5"}, {"value": 1, "name": "Toolbar_First", "id": "P5"}, {"value": "P6", "name": "Participant ID", "id": "P6"}, {"value": 1, "name": "Identify Errors:", "id": "P6"}, {"value": 3, "name": "Fix Errors:", "id": "P6"}, {"value": 3, "name": "Easier to Fix:", "id": "P6"}, {"value": 4, "name": "Faster to Fix:", "id": "P6"}, {"value": 4, "name": "Easier on Phone:", "id": "P6"}, {"value": 4, "name": "Easier on Tablet:", "id": "P6"}, {"value": 4, "name": "Device Preference:", "id": "P6"}, {"value": 0, "name": "Tablet_First", "id": "P6"}, {"value": 1, "name": "Toolbar_First", "id": "P6"}, {"value": "P7", "name": "Participant ID", "id": "P7"}, {"value": 2, "name": "Identify Errors:", "id": "P7"}, {"value": 3, "name": "Fix Errors:", "id": "P7"}, {"value": 4, "name": "Easier to Fix:", "id": "P7"}, {"value": 5, "name": "Faster to Fix:", "id": "P7"}, {"value": 3, "name": "Easier on Phone:", "id": "P7"}, {"value": 2, "name": "Easier on Tablet:", "id": "P7"}, {"value": 4, "name": "Device Preference:", "id": "P7"}, {"value": 0, "name": "Tablet_First", "id": "P7"}, {"value": 0, "name": "Toolbar_First", "id": "P7"}, {"value": "P8", "name": "Participant ID", "id": "P8"}, {"value": 3, "name": "Identify Errors:", "id": "P8"}, {"value": 1, "name": "Fix Errors:", "id": "P8"}, {"value": 2, "name": "Easier to Fix:", "id": "P8"}, {"value": 4, "name": "Faster to Fix:", "id": "P8"}, {"value": 2, "name": "Easier on Phone:", "id": "P8"}, {"value": 5, "name": "Easier on Tablet:", "id": "P8"}, {"value": 5, "name": "Device Preference:", "id": "P8"}, {"value": 0, "name": "Tablet_First", "id": "P8"}, {"value": 0, "name": "Toolbar_First", "id": "P8"}, {"value": "P9", "name": "Participant ID", "id": "P9"}, {"value": 2, "name": "Identify Errors:", "id": "P9"}, {"value": 3, "name": "Fix Errors:", "id": "P9"}, {"value": 2, "name": "Easier to Fix:", "id": "P9"}, {"value": 4, "name": "Faster to Fix:", "id": "P9"}, {"value": 1, "name": "Easier on Phone:", "id": "P9"}, {"value": 4, "name": "Easier on Tablet:", "id": "P9"}, {"value": 4, "name": "Device Preference:", "id": "P9"}, {"value": 1, "name": "Tablet_First", "id": "P9"}, {"value": 1, "name": "Toolbar_First", "id": "P9"}, {"value": "P10", "name": "Participant ID", "id": "P10"}, {"value": 2, "name": "Identify Errors:", "id": "P10"}, {"value": 2, "name": "Fix Errors:", "id": "P10"}, {"value": 1, "name": "Easier to Fix:", "id": "P10"}, {"value": 1, "name": "Faster to Fix:", "id": "P10"}, {"value": 1, "name": "Easier on Phone:", "id": "P10"}, {"value": 1, "name": "Easier on Tablet:", "id": "P10"}, {"value": 5, "name": "Device Preference:", "id": "P10"}, {"value": 1, "name": "Tablet_First", "id": "P10"}, {"value": 1, "name": "Toolbar_First", "id": "P10"}, {"value": "P11", "name": "Participant ID", "id": "P11"}, {"value": 2, "name": "Identify Errors:", "id": "P11"}, {"value": 2, "name": "Fix Errors:", "id": "P11"}, {"value": 1, "name": "Easier to Fix:", "id": "P11"}, {"value": 1, "name": "Faster to Fix:", "id": "P11"}, {"value": 1, "name": "Easier on Phone:", "id": "P11"}, {"value": 1, "name": "Easier on Tablet:", "id": "P11"}, {"value": 4, "name": "Device Preference:", "id": "P11"}, {"value": 1, "name": "Tablet_First", "id": "P11"}, {"value": 0, "name": "Toolbar_First", "id": "P11"}, {"value": "P12", "name": "Participant ID", "id": "P12"}, {"value": 1, "name": "Identify Errors:", "id": "P12"}, {"value": 3, "name": "Fix Errors:", "id": "P12"}, {"value": 2, "name": "Easier to Fix:", "id": "P12"}, {"value": 3, "name": "Faster to Fix:", "id": "P12"}, {"value": 1, "name": "Easier on Phone:", "id": "P12"}, {"value": 3, "name": "Easier on Tablet:", "id": "P12"}, {"value": 3, "name": "Device Preference:", "id": "P12"}, {"value": 0, "name": "Tablet_First", "id": "P12"}, {"value": 1, "name": "Toolbar_First", "id": "P12"}, {"value": "P13", "name": "Participant ID", "id": "P13"}, {"value": 2, "name": "Identify Errors:", "id": "P13"}, {"value": 2, "name": "Fix Errors:", "id": "P13"}, {"value": 1, "name": "Easier to Fix:", "id": "P13"}, {"value": 1, "name": "Faster to Fix:", "id": "P13"}, {"value": 1, "name": "Easier on Phone:", "id": "P13"}, {"value": 1, "name": "Easier on Tablet:", "id": "P13"}, {"value": 5, "name": "Device Preference:", "id": "P13"}, {"value": 0, "name": "Tablet_First", "id": "P13"}, {"value": 0, "name": "Toolbar_First", "id": "P13"}, {"value": "P14", "name": "Participant ID", "id": "P14"}, {"value": 3, "name": "Identify Errors:", "id": "P14"}, {"value": 3, "name": "Fix Errors:", "id": "P14"}, {"value": 2, "name": "Easier to Fix:", "id": "P14"}, {"value": 2, "name": "Faster to Fix:", "id": "P14"}, {"value": 1, "name": "Easier on Phone:", "id": "P14"}, {"value": 1, "name": "Easier on Tablet:", "id": "P14"}, {"value": 1, "name": "Device Preference:", "id": "P14"}, {"value": 1, "name": "Tablet_First", "id": "P14"}, {"value": 1, "name": "Toolbar_First", "id": "P14"}, {"value": "P15", "name": "Participant ID", "id": "P15"}, {"value": 4, "name": "Identify Errors:", "id": "P15"}, {"value": 5, "name": "Fix Errors:", "id": "P15"}, {"value": 1, "name": "Easier to Fix:", "id": "P15"}, {"value": 1, "name": "Faster to Fix:", "id": "P15"}, {"value": 1, "name": "Easier on Phone:", "id": "P15"}, {"value": 1, "name": "Easier on Tablet:", "id": "P15"}, {"value": 5, "name": "Device Preference:", "id": "P15"}, {"value": 1, "name": "Tablet_First", "id": "P15"}, {"value": 0, "name": "Toolbar_First", "id": "P15"}, {"value": "P16", "name": "Participant ID", "id": "P16"}, {"value": 1, "name": "Identify Errors:", "id": "P16"}, {"value": 3, "name": "Fix Errors:", "id": "P16"}, {"value": 2, "name": "Easier to Fix:", "id": "P16"}, {"value": 2, "name": "Faster to Fix:", "id": "P16"}, {"value": 1, "name": "Easier on Phone:", "id": "P16"}, {"value": 4, "name": "Easier on Tablet:", "id": "P16"}, {"value": 5, "name": "Device Preference:", "id": "P16"}, {"value": 0, "name": "Tablet_First", "id": "P16"}, {"value": 1, "name": "Toolbar_First", "id": "P16"}, {"value": "P17", "name": "Participant ID", "id": "P17"}, {"value": 3, "name": "Identify Errors:", "id": "P17"}, {"value": 2, "name": "Fix Errors:", "id": "P17"}, {"value": 2, "name": "Easier to Fix:", "id": "P17"}, {"value": 2, "name": "Faster to Fix:", "id": "P17"}, {"value": 1, "name": "Easier on Phone:", "id": "P17"}, {"value": 3, "name": "Easier on Tablet:", "id": "P17"}, {"value": 2, "name": "Device Preference:", "id": "P17"}, {"value": 0, "name": "Tablet_First", "id": "P17"}, {"value": 0, "name": "Toolbar_First", "id": "P17"}, ] ) y_axis = alt.Y( "name", axis=alt.Axis( title=None, offset=50, labelFontWeight="bold", ticks=False, grid=True, domain=False, ), ) base = alt.Chart( medians, ).encode(y_axis) bubbles = ( alt.Chart(values) .transform_filter( (alt.datum.name != "Toolbar_First") & (alt.datum.name != "Tablet_First") & (alt.datum.name != "Participant ID") ) .mark_circle(color="#6EB4FD") .encode( alt.X( "value:Q", title=None, ), y_axis, alt.Size( "count()", legend=alt.Legend(offset=75, title="Number of ratings"), ), tooltip=[alt.Tooltip("count()", title="Number of ratings")], ) ) ticks = base.mark_tick(color="black").encode( alt.X( "median:Q", axis=alt.Axis(grid=False, values=[1, 2, 3, 4, 5], format=".0f"), scale=alt.Scale(domain=[0, 6]), ) ) texts_lo = base.mark_text(align="right", x=-5).encode(text="lo") texts_hi = base.mark_text(align="left", x=255).encode(text="hi") (bubbles + ticks + texts_lo + texts_hi).properties( title="Questionnaire Ratings", width=250, height=175 ).configure_view(stroke=None) ================================================ FILE: tests/examples_arguments_syntax/distributions_faceted_histogram.py ================================================ """ Faceted Histogram ----------------- This example shows how to make a basic faceted histogram, with one histogram subplot for different subsets of the data. Based off the vega-lite example: https://vega.github.io/vega-lite/examples/trellis_bar_histogram.html """ # category: distributions import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_bar().encode( alt.X("Horsepower:Q", bin=True), y="count()", row="Origin", ) ================================================ FILE: tests/examples_arguments_syntax/diverging_stacked_bar_chart.py ================================================ """ Diverging Stacked Bar Chart --------------------------- This example shows a diverging stacked bar chart for sentiments towards a set of eight questions, displayed as percentages with neutral responses straddling the 0% mark. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( [ { "question": "Question 1", "type": "Strongly disagree", "value": 24, }, { "question": "Question 1", "type": "Disagree", "value": 294, }, { "question": "Question 1", "type": "Neither agree nor disagree", "value": 594, }, { "question": "Question 1", "type": "Agree", "value": 1927, }, { "question": "Question 1", "type": "Strongly agree", "value": 376, }, { "question": "Question 2", "type": "Strongly disagree", "value": 2, }, { "question": "Question 2", "type": "Disagree", "value": 2, }, { "question": "Question 2", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 2", "type": "Agree", "value": 7, }, { "question": "Question 2", "type": "Strongly agree", "value": 11, }, { "question": "Question 3", "type": "Strongly disagree", "value": 2, }, { "question": "Question 3", "type": "Disagree", "value": 0, }, { "question": "Question 3", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 3", "type": "Agree", "value": 4, }, { "question": "Question 3", "type": "Strongly agree", "value": 2, }, { "question": "Question 4", "type": "Strongly disagree", "value": 0, }, { "question": "Question 4", "type": "Disagree", "value": 2, }, { "question": "Question 4", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 4", "type": "Agree", "value": 7, }, { "question": "Question 4", "type": "Strongly agree", "value": 6, }, { "question": "Question 5", "type": "Strongly disagree", "value": 0, }, { "question": "Question 5", "type": "Disagree", "value": 1, }, { "question": "Question 5", "type": "Neither agree nor disagree", "value": 3, }, { "question": "Question 5", "type": "Agree", "value": 16, }, { "question": "Question 5", "type": "Strongly agree", "value": 4, }, { "question": "Question 6", "type": "Strongly disagree", "value": 1, }, { "question": "Question 6", "type": "Disagree", "value": 1, }, { "question": "Question 6", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 6", "type": "Agree", "value": 9, }, { "question": "Question 6", "type": "Strongly agree", "value": 3, }, { "question": "Question 7", "type": "Strongly disagree", "value": 0, }, { "question": "Question 7", "type": "Disagree", "value": 0, }, { "question": "Question 7", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 7", "type": "Agree", "value": 4, }, { "question": "Question 7", "type": "Strongly agree", "value": 0, }, { "question": "Question 8", "type": "Strongly disagree", "value": 0, }, { "question": "Question 8", "type": "Disagree", "value": 0, }, { "question": "Question 8", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 8", "type": "Agree", "value": 0, }, { "question": "Question 8", "type": "Strongly agree", "value": 2, }, ] ) # Add type_code that we can sort by source["type_code"] = source["type"].map( { "Strongly disagree": -2, "Disagree": -1, "Neither agree nor disagree": 0, "Agree": 1, "Strongly agree": 2, } ) def compute_percentages( group, ): # Set type_code as index and sort group = group.set_index("type_code").sort_index() # Compute percentage of value with question group perc = (group["value"] / group["value"].sum()) * 100 group["percentage"] = perc # Compute percentage end, centered on "Neither agree nor disagree" (type_code 0) # Note that we access the perc series via index which is based on 'type_code'. group["percentage_end"] = perc.cumsum() - (perc[-2] + perc[-1] + perc[0] / 2) # Compute percentage start by subtracting percent group["percentage_start"] = group["percentage_end"] - perc return group source = source.groupby("question").apply(compute_percentages).reset_index(drop=True) color_scale = alt.Scale( domain=[ "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", ], range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"], ) y_axis = alt.Axis(title="Question", offset=5, ticks=False, minExtent=60, domain=False) alt.Chart(source).mark_bar().encode( x="percentage_start:Q", x2="percentage_end:Q", y=alt.Y("question:N", axis=y_axis), color=alt.Color( "type:N", legend=alt.Legend(title="Response"), scale=color_scale, ), ) ================================================ FILE: tests/examples_arguments_syntax/donut_chart.py ================================================ """ Donut Chart ----------- This example shows how to make a Donut Chart using ``mark_arc``. This is adapted from a corresponding Vega-Lite Example: `Donut Chart `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) alt.Chart(source).mark_arc(innerRadius=50).encode( theta=alt.Theta(field="value", type="quantitative"), color=alt.Color(field="category", type="nominal"), ) ================================================ FILE: tests/examples_arguments_syntax/dot_dash_plot.py ================================================ """ Dot Dash Plot ============= How to make the dot-dash plot presented in Edward Tufte's `Visual Display of Quantitative Information `_. Based on a JavaScript implementation by `g3o2 `_. """ # category: distributions import altair as alt from altair.datasets import data source = data.cars() # Configure the options common to all layers brush = alt.selection_interval() brush_origin = alt.when(brush).then("Origin") base = alt.Chart(source).add_params(brush) # Configure the points points = base.mark_point().encode( x=alt.X('Miles_per_Gallon', title=''), y=alt.Y('Horsepower', title=''), color=brush_origin.otherwise(alt.value("grey")), ) # Configure the ticks tick_axis = alt.Axis(labels=False, domain=False, ticks=False) tick_color = brush_origin.otherwise(alt.value("lightgrey")) x_ticks = base.mark_tick().encode( alt.X('Miles_per_Gallon', axis=tick_axis), alt.Y('Origin', title='', axis=tick_axis), color=tick_color ) y_ticks = base.mark_tick().encode( alt.X('Origin', title='', axis=tick_axis), alt.Y('Horsepower', axis=tick_axis), color=tick_color ) # Build the chart y_ticks | (points & x_ticks) ================================================ FILE: tests/examples_arguments_syntax/empirical_cumulative_distribution_function.py ================================================ """ Empirical Cumulative Distribution Function ------------------------------------------ This example shows an empirical cumulative distribution function. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).transform_window( ecdf="cume_dist()", sort=[{"field": "IMDB Rating"}], ).mark_line( interpolate="step-after" ).encode( x="IMDB Rating:Q", y="ecdf:Q" ) ================================================ FILE: tests/examples_arguments_syntax/errorbars_with_ci.py ================================================ """ Error Bars with Confidence Interval ====================================== This example shows how to show error bars using confidence intervals. The confidence intervals are computed internally in vega by a non-parametric `bootstrap of the mean `_. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent='ci').encode( x=alt.X('yield:Q', scale=alt.Scale(zero=False)), y=alt.Y('variety:N') ) points = alt.Chart(source).mark_point(filled=True, color='black').encode( x=alt.X('yield:Q', aggregate='mean'), y=alt.Y('variety:N'), ) error_bars + points ================================================ FILE: tests/examples_arguments_syntax/errorbars_with_std.py ================================================ """ Error Bars with Standard Deviation ---------------------------------- This example shows how to show error bars with standard deviation using crop yields data of different in the years of 1930s. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent='stdev').encode( x=alt.X('yield:Q', scale=alt.Scale(zero=False)), y=alt.Y('variety:N') ) points = alt.Chart(source).mark_point(filled=True, color='black').encode( x=alt.X('yield:Q', aggregate='mean'), y=alt.Y('variety:N'), ) error_bars + points ================================================ FILE: tests/examples_arguments_syntax/falkensee.py ================================================ """ Population of Falkensee from 1875 to 2014 ----------------------------------------- This example is a reproduction of the Falkensee plot found in the Vega-Lite examples. """ # category: case studies import altair as alt import pandas as pd source = [ {"year": "1875", "population": 1309}, {"year": "1890", "population": 1558}, {"year": "1910", "population": 4512}, {"year": "1925", "population": 8180}, {"year": "1933", "population": 15915}, {"year": "1939", "population": 24824}, {"year": "1946", "population": 28275}, {"year": "1950", "population": 29189}, {"year": "1964", "population": 29881}, {"year": "1971", "population": 26007}, {"year": "1981", "population": 24029}, {"year": "1985", "population": 23340}, {"year": "1989", "population": 22307}, {"year": "1990", "population": 22087}, {"year": "1991", "population": 22139}, {"year": "1992", "population": 22105}, {"year": "1993", "population": 22242}, {"year": "1994", "population": 22801}, {"year": "1995", "population": 24273}, {"year": "1996", "population": 25640}, {"year": "1997", "population": 27393}, {"year": "1998", "population": 29505}, {"year": "1999", "population": 32124}, {"year": "2000", "population": 33791}, {"year": "2001", "population": 35297}, {"year": "2002", "population": 36179}, {"year": "2003", "population": 36829}, {"year": "2004", "population": 37493}, {"year": "2005", "population": 38376}, {"year": "2006", "population": 39008}, {"year": "2007", "population": 39366}, {"year": "2008", "population": 39821}, {"year": "2009", "population": 40179}, {"year": "2010", "population": 40511}, {"year": "2011", "population": 40465}, {"year": "2012", "population": 40905}, {"year": "2013", "population": 41258}, {"year": "2014", "population": 41777}, ] source2 = [ {"start": "1933", "end": "1945", "event": "Nazi Rule"}, {"start": "1948", "end": "1989", "event": "GDR (East Germany)"}, ] source_df = pd.DataFrame(source) source2_df = pd.DataFrame(source2) line = alt.Chart(source_df).mark_line(color="#333").encode( x=alt.X("year:T", axis=alt.Axis(format="%Y"), title="Year"), y=alt.Y("population", title="Population"), ) point = line.mark_point(color="#333") rect = alt.Chart(source2_df).mark_rect().encode( x="start:T", x2="end:T", color=alt.Color("event:N", title="Event") ) (rect + line + point).properties( title="Population of Falkensee from 1875 to 2014", width=500, height=300 ) ================================================ FILE: tests/examples_arguments_syntax/filled_step_chart.py ================================================ """ Filled Step Chart ----------------- This example shows Google's stock price over time as a step chart with its area filled in and its line emphasized. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_area( color="lightblue", interpolate='step-after', line=True ).encode( x='date', y='price' ).transform_filter(alt.datum.symbol == 'GOOG') ================================================ FILE: tests/examples_arguments_syntax/gantt_chart.py ================================================ """ Gantt Chart ----------------- This example shows how to make a simple Gantt chart. """ # category: advanced calculations import altair as alt import pandas as pd source = pd.DataFrame([ {"task": "A", "start": 1, "end": 3}, {"task": "B", "start": 3, "end": 8}, {"task": "C", "start": 8, "end": 10} ]) alt.Chart(source).mark_bar().encode( x='start', x2='end', y='task' ) ================================================ FILE: tests/examples_arguments_syntax/gapminder_bubble_plot.py ================================================ """ Gapminder Bubble Plot ===================== This example shows how to make a bubble plot showing the correlation between health and income for 187 countries in the world (modified from an example in Lisa Charlotte Rost's blog post `'One Chart, Twelve Charting Libraries' `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.gapminder_health_income.url alt.Chart(source).mark_circle().encode( alt.X('income:Q', scale=alt.Scale(type='log')), alt.Y('health:Q', scale=alt.Scale(zero=False)), size='population:Q' ) ================================================ FILE: tests/examples_arguments_syntax/groupby-map.py ================================================ """ Grouped Points with Proportional Symbols Map ============================================ This is a layered geographic visualization that groups points by state. """ # category: maps import altair as alt from altair.datasets import data airports = data.airports.url states = alt.topo_feature(data.us_10m.url, feature='states') # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( width=500, height=300 ).project('albersUsa') # Airports grouped by state points = alt.Chart(airports).transform_aggregate( latitude='mean(latitude)', longitude='mean(longitude)', count='count()', groupby=['state'] ).mark_circle().encode( longitude='longitude:Q', latitude='latitude:Q', size=alt.Size('count:Q', title='Number of Airports'), color=alt.value('steelblue'), tooltip=['state:N','count:Q'] ).properties( title='Number of airports in US' ) background + points ================================================ FILE: tests/examples_arguments_syntax/grouped_bar_chart.py ================================================ """ Grouped Bar Chart ----------------- This example shows a grouped bar chart. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='year:O', y='sum(yield):Q', color='year:N', column='site:N' ) ================================================ FILE: tests/examples_arguments_syntax/grouped_bar_chart2.py ================================================ """ Grouped Bar Chart with xOffset ------------------------------ Like :ref:`gallery_grouped_bar_chart`, this example shows a grouped bar chart. Whereas :ref:`gallery_grouped_bar_chart` used the ``column`` encoding channel, this example uses the ``xOffset`` encoding channel. This is adapted from a corresponding Vega-Lite Example: `Grouped Bar Chart `_. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame({"Category":list("AAABBBCCC"), "Group":list("xyzxyzxyz"), "Value":[0.1, 0.6, 0.9, 0.7, 0.2, 1.1, 0.6, 0.1, 0.2]}) alt.Chart(source).mark_bar().encode( x="Category:N", y="Value:Q", xOffset="Group:N", color="Group:N" ) ================================================ FILE: tests/examples_arguments_syntax/grouped_bar_chart_horizontal.py ================================================ """ Horizontal Grouped Bar Chart ---------------------------- This example shows a horizontal grouped bar chart. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield):Q', y='year:O', color='year:N', row='site:N' ) ================================================ FILE: tests/examples_arguments_syntax/grouped_bar_chart_overlapping_bars.py ================================================ """ Grouped Bar Chart with xOffset and overlapping bars --------------------------------------------------- Like :ref:`gallery_grouped_bar_chart2`, this example shows a grouped bar chart using the ``xOffset`` encoding channel, but in this example the bars are partly overlapping within each group. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( { "category": list("AABBCC"), "group": list("xyxyxy"), "value": [0.1, 0.6, 0.7, 0.2, 0.6, 0.1], } ) base = alt.Chart(source, width=alt.Step(12)).encode( x="category:N", y="value:Q", xOffset=alt.XOffset("group:N", scale=alt.Scale(paddingOuter=0.5)), ) alt.layer( base.mark_bar(size=20, stroke="white", fillOpacity=0.9).encode(fill="group:N"), base.mark_text(dy=-5).encode(text="value:Q"), ) ================================================ FILE: tests/examples_arguments_syntax/grouped_bar_chart_with_error_bars.py ================================================ """ Grouped Bar Chart with Error Bars --------------------------------- This example shows a grouped bar chart with error bars. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() bars = alt.Chart().mark_bar().encode( x='year:O', y=alt.Y('mean(yield):Q', title='Mean Yield'), color='year:N', ) error_bars = alt.Chart().mark_errorbar(extent='ci').encode( x='year:O', y='yield:Q' ) alt.layer(bars, error_bars, data=source).facet( column='site:N' ) ================================================ FILE: tests/examples_arguments_syntax/heat_lane.py ================================================ """ Heat Lane Chart --------------- This example shows how to make an alternative form of a histogram `designed at Google `_ with the goal of increasing accessibility. """ # category: distributions import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source, title="Car horsepower", height=100, width=300).encode( x=alt.X( "bin_Horsepower_start:Q", title="Horsepower", axis=alt.Axis(grid=False) ), x2="bin_Horsepower_end:Q", y=alt.Y("y:O", axis=None), y2="y2", ).transform_bin( ["bin_Horsepower_start", "bin_Horsepower_end"], field='Horsepower' ).transform_aggregate( count='count()', groupby=["bin_Horsepower_start", "bin_Horsepower_end"] ).transform_bin( ["bin_count_start", "bin_count_end"], field='count' ).transform_calculate( y="datum.bin_count_end/2", y2="-datum.bin_count_end/2", ).transform_joinaggregate( max_bin_count_end="max(bin_count_end)", ) layer1 = chart.mark_bar(xOffset=1, x2Offset=-1, cornerRadius=3).encode( color=alt.Color("max_bin_count_end:O", scale=alt.Scale(scheme="lighttealblue"), title="Number of models") ) layer2 = chart.mark_bar(xOffset=1, x2Offset=-1, yOffset=-3, y2Offset=3).encode( color=alt.Color("bin_count_end:O", title="Number of models") ) layer1 + layer2 ================================================ FILE: tests/examples_arguments_syntax/hexbins.py ================================================ """ Hexbin Chart ------------ This example shows a hexbin chart. """ # category: tables import altair as alt from altair.datasets import data source = data.seattle_weather() # Size of the hexbins size = 15 # Count of distinct x features xFeaturesCount = 12 # Count of distinct y features yFeaturesCount = 7 # Name of the x field xField = 'date' # Name of the y field yField = 'date' # the shape of a hexagon hexagon = "M0,-2.3094010768L2,-1.1547005384 2,1.1547005384 0,2.3094010768 -2,1.1547005384 -2,-1.1547005384Z" alt.Chart(source).mark_point(size=size**2, shape=hexagon).encode( x=alt.X('xFeaturePos:Q', axis=alt.Axis(title='Month', grid=False, tickOpacity=0, domainOpacity=0)), y=alt.Y('day(' + yField + '):O', axis=alt.Axis(title='Weekday', labelPadding=20, tickOpacity=0, domainOpacity=0)), stroke=alt.value('black'), strokeWidth=alt.value(0.2), fill=alt.Fill('mean(temp_max):Q', scale=alt.Scale(scheme='darkblue')), tooltip=['month(' + xField + '):O', 'day(' + yField + '):O', 'mean(temp_max):Q'] ).transform_calculate( # This field is required for the hexagonal X-Offset xFeaturePos='(day(datum.' + yField + ') % 2) / 2 + month(datum.' + xField + ')' ).properties( # Exact scaling factors to make the hexbins fit width=size * xFeaturesCount * 2, height=size * yFeaturesCount * 1.7320508076, # 1.7320508076 is approx. sin(60°)*2 ).configure_view( strokeWidth=0 ) ================================================ FILE: tests/examples_arguments_syntax/histogram_gradient_color.py ================================================ """ Histogram with Gradient Color ----------------------------- This example shows how to make a histogram with gradient color. The low-high IMDB rating is represented with the color scheme `pinkyellowgreen`. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( alt.X("IMDB Rating:Q", bin=alt.Bin(maxbins=20), scale=alt.Scale(domain=[1, 10]) ), alt.Y('count()'), alt.Color("IMDB Rating:Q", bin=alt.Bin(maxbins=20), scale=alt.Scale(scheme='pinkyellowgreen') ) ) ================================================ FILE: tests/examples_arguments_syntax/histogram_heatmap.py ================================================ """ 2D Histogram Heatmap -------------------- This example shows how to make a heatmap from binned quantitative data. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_rect().encode( alt.X('IMDB Rating:Q', bin=alt.Bin(maxbins=60)), alt.Y('Rotten Tomatoes Rating:Q', bin=alt.Bin(maxbins=40)), alt.Color('count():Q', scale=alt.Scale(scheme='greenblue')) ) ================================================ FILE: tests/examples_arguments_syntax/histogram_responsive.py ================================================ """ Histogram with Responsive Bins ------------------------------ This shows an example of a histogram with bins that are responsive to a selection domain. Click and drag on the bottom panel to see the bins change on the top panel. """ # category: distributions import altair as alt from altair.datasets import data source = data.flights_5k.url brush = alt.selection_interval(encodings=['x']) base = alt.Chart(source).transform_calculate( time="hours(datum.date) + minutes(datum.date) / 60" ).mark_bar().encode( y='count():Q' ).properties( width=600, height=100 ) alt.vconcat( base.encode( alt.X('time:Q', bin=alt.Bin(maxbins=30, extent=brush), scale=alt.Scale(domain=brush) ) ), base.encode( alt.X('time:Q', bin=alt.Bin(maxbins=30)), ).add_params(brush) ) ================================================ FILE: tests/examples_arguments_syntax/histogram_scatterplot.py ================================================ """ 2D Histogram Scatter Plot ------------------------- This example shows how to make a 2d histogram scatter plot. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_circle().encode( alt.X('IMDB Rating:Q', bin=True), alt.Y('Rotten Tomatoes Rating:Q', bin=True), size='count()' ) ================================================ FILE: tests/examples_arguments_syntax/histogram_with_a_global_mean_overlay.py ================================================ """ Histogram with a Global Mean Overlay ------------------------------------ This example shows a histogram with a global mean overlay. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url base = alt.Chart(source) bar = base.mark_bar().encode( x=alt.X('IMDB Rating:Q', bin=True, axis=None), y='count()' ) rule = base.mark_rule(color='red').encode( x='mean(IMDB Rating):Q', size=alt.value(5) ) bar + rule ================================================ FILE: tests/examples_arguments_syntax/horizon_graph.py ================================================ """ Horizon Graph ------------- This example shows how to make a Horizon Graph with 2 layers. (See https://idl.cs.washington.edu/papers/horizon/ for more details on Horizon Graphs.) """ # category: area charts import altair as alt import pandas as pd source = pd.DataFrame([ {"x": 1, "y": 28}, {"x": 2, "y": 55}, {"x": 3, "y": 43}, {"x": 4, "y": 91}, {"x": 5, "y": 81}, {"x": 6, "y": 53}, {"x": 7, "y": 19}, {"x": 8, "y": 87}, {"x": 9, "y": 52}, {"x": 10, "y": 48}, {"x": 11, "y": 24}, {"x": 12, "y": 49}, {"x": 13, "y": 87}, {"x": 14, "y": 66}, {"x": 15, "y": 17}, {"x": 16, "y": 27}, {"x": 17, "y": 68}, {"x": 18, "y": 16}, {"x": 19, "y": 49}, {"x": 20, "y": 15} ]) area1 = alt.Chart(source).mark_area( clip=True, interpolate='monotone' ).encode( alt.X('x', scale=alt.Scale(zero=False, nice=False)), alt.Y('y', scale=alt.Scale(domain=[0, 50]), title='y'), opacity=alt.value(0.6) ).properties( width=500, height=75 ) area2 = area1.encode( alt.Y('ny:Q', scale=alt.Scale(domain=[0, 50])) ).transform_calculate( "ny", alt.datum.y - 50 ) area1 + area2 ================================================ FILE: tests/examples_arguments_syntax/horizontal_stacked_bar_chart.py ================================================ """ Horizontal Stacked Bar Chart ============================ This is an example of a horizontal stacked bar chart using data which contains crop yields over different regions and different years in the 1930s. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield)', y='variety', color='site' ) ================================================ FILE: tests/examples_arguments_syntax/interactive_aggregation.py ================================================ """ Interactive Chart with Aggregation ================================== This example shows an interactive chart where the range binder controls a threshold as rule where the datapoints on the left-side are aggregated and on the right-side are drawn as is. The ability to slide back and fourth may help you understand how the visualization represents the aggregation. Adapted from an example by @dwootton. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.movies.url slider = alt.binding_range(min=0, max=10, step=0.1) threshold = alt.param(name="threshold", value=5, bind=slider) alt.layer( alt.Chart(source).mark_circle().encode( x=alt.X("IMDB Rating:Q", title="IMDB Rating"), y=alt.Y("Rotten Tomatoes Rating:Q", title="Rotten Tomatoes Rating") ).transform_filter( alt.datum["IMDB Rating"] >= threshold ), alt.Chart(source).mark_circle().encode( x=alt.X("IMDB Rating:Q", bin=alt.Bin(maxbins=10)), y=alt.Y("Rotten Tomatoes Rating:Q", bin=alt.Bin(maxbins=10)), size=alt.Size("count():Q", scale=alt.Scale(domain=[0,160])) ).transform_filter( alt.datum["IMDB Rating"] < threshold ), alt.Chart().mark_rule(color="gray").encode( strokeWidth=alt.StrokeWidth(value=6), x=alt.X(datum=alt.expr(threshold.name), type="quantitative") ) ).add_params(threshold) ================================================ FILE: tests/examples_arguments_syntax/interactive_bar_select_highlight.py ================================================ """ Bar Chart with Highlighting on Hover and Selection on Click ----------------------------------------------------------- This example shows a bar chart with highlighting on hover and selecting on click. (Inspired by Tableau's interaction style.) Based on https://vega.github.io/vega-lite/examples/interactive_bar_select_highlight.html """ # category: interactive charts import altair as alt source = { "values": [ {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}, ] } select = alt.selection_point(name="select", on="click") highlight = alt.selection_point(name="highlight", on="pointerover", empty=False) stroke_width = ( alt.when(select).then(alt.value(2, empty=False)) .when(highlight).then(alt.value(1)) .otherwise(alt.value(0)) ) alt.Chart( source, height=200, config=alt.Config(scale=alt.ScaleConfig(bandPaddingInner=0.2)), ).mark_bar(fill="#4C78A8", stroke="black", cursor="pointer").encode( x="a:O", y="b:Q", fillOpacity=alt.when(select).then(alt.value(1)).otherwise(alt.value(0.3)), strokeWidth=stroke_width, ).add_params(select, highlight) ================================================ FILE: tests/examples_arguments_syntax/interactive_brush.py ================================================ """ Interactive Rectangular Brush ============================= This example shows how to add a simple rectangular brush to a scatter plot. By clicking and dragging on the plot, you can highlight points within the range. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval() alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Cylinders:O").otherwise(alt.value("grey")), ).add_params(brush) ================================================ FILE: tests/examples_arguments_syntax/interactive_column_selection.py ================================================ """ Interactive Selection of Columns ================================ This example shows how columns can be selected interactively by accessing the values from selector widgets, and then compute the difference of the selected columns. It also illustrates how to use `indexof` to filter columns based on active selection values. """ # category: interactive charts import pandas as pd import numpy as np import altair as alt # Create timeseries data rng = np.random.default_rng(905) ex_ts = pd.DataFrame( rng.random((10, 4)), columns=['a', 'b', 'c', 'd'], ).assign( date=pd.date_range( start=pd.to_datetime('2022-02-22')-pd.Timedelta(9, unit='D'), end=pd.to_datetime('2022-02-22')).strftime('%Y-%m-%d'), ) # Create heatmap with selection select_x = alt.selection_point(fields=['level_0'], name='select_x', value='b') select_y = alt.selection_point(fields=['level_1'], name='select_y', value='d') heatmap = alt.Chart( ex_ts.drop(columns='date').corr().stack().reset_index().rename(columns={0: 'correlation'}), title='Click a tile to compare timeseries', height=250, width=250, ).mark_rect().encode( alt.X('level_0', title=None), alt.Y('level_1', title=None), alt.Color('correlation', scale=alt.Scale(domain=[-1, 1], scheme='blueorange')), opacity=alt.when(select_x, select_y).then(alt.value(1)).otherwise(alt.value(0.4)), ).add_params( select_x, select_y ) # Create chart with individual lines/timeseries base = alt.Chart( ex_ts.melt( id_vars='date', var_name='category', value_name='value', ), height=100, width=300, title='Individual timeseries', ) lines = base.transform_filter( # If the category is not in the selected values, the returned index is -1 'indexof(datum.category, select_x.level_0) !== -1' '| indexof(datum.category, select_y.level_1) !== -1' ).mark_line().encode( alt.X('date:T', axis=alt.Axis(labels=False), title=None), alt.Y('value', scale=alt.Scale(domain=(0, 1))), alt.Color('category', legend=alt.Legend(orient='top', offset=-20), title=None) ) # Create chart with difference between lines/timeseries dynamic_title = alt.Title(alt.expr(f'"Difference " + {select_x.name}.level_0 + " - " + {select_y.name}.level_1')) # We pivot transform to get each category as a column lines_diff = base.transform_pivot( 'category', 'value', groupby=['date'] # In the calculate transform we use the values from the selection to subset the columns to subtract ).transform_calculate( difference = f'datum[{select_x.name}.level_0] - datum[{select_y.name}.level_1]' ).mark_line(color='grey').encode( alt.X('date:T', axis=alt.Axis(format='%Y-%m-%d'), title=None), alt.Y('difference:Q', scale=alt.Scale(domain=(-1, 1))), ).properties( title=dynamic_title ) # Layout the charts (lines & lines_diff) | heatmap ================================================ FILE: tests/examples_arguments_syntax/interactive_cross_highlight.py ================================================ """ Interactive Chart with Cross-Highlight ====================================== This example shows an interactive chart where selections in one portion of the chart affect what is shown in other panels. Click on the bar chart to see a detail of the distribution in the upper panel. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.movies.url pts = alt.selection_point(encodings=['x']) rect = alt.Chart(data.movies.url).mark_rect().encode( alt.X('IMDB Rating:Q', bin=True), alt.Y('Rotten Tomatoes Rating:Q', bin=True), alt.Color('count()', scale=alt.Scale(scheme='greenblue'), legend=alt.Legend(title='Total Records') ) ) circ = rect.mark_point().encode( alt.ColorValue('grey'), alt.Size('count()', legend=alt.Legend(title='Records in Selection') ) ).transform_filter( pts ) bar = alt.Chart(source).mark_bar().encode( x='Major Genre:N', y='count()', color=alt.when(pts).then(alt.ColorValue("steelblue")).otherwise(alt.ColorValue("grey")) ).properties( width=550, height=200 ).add_params(pts) alt.vconcat( rect + circ, bar ).resolve_legend( color="independent", size="independent" ) ================================================ FILE: tests/examples_arguments_syntax/interactive_layered_crossfilter.py ================================================ """ Interactive Crossfilter ======================= This example shows a multi-panel view of the same data, where you can interactively select a portion of the data in any of the panels to highlight that portion in any of the other panels. """ # category: interactive charts import altair as alt from altair.datasets import data source = alt.UrlData( data.flights_2k.url, format={'parse': {'date': 'date'}} ) brush = alt.selection_interval(encodings=['x']) # Define the base chart, with the common parts of the # background and highlights base = alt.Chart().mark_bar().encode( x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)), y='count()' ).properties( width=160, height=130 ) # gray background with selection background = base.encode( color=alt.value('#ddd') ).add_params(brush) # blue highlights on the transformed data highlight = base.transform_filter(brush) # layer the two charts & repeat alt.layer( background, highlight, data=source ).transform_calculate( "time", "hours(datum.date)" ).repeat(column=["distance", "delay", "time"]) ================================================ FILE: tests/examples_arguments_syntax/interactive_legend.py ================================================ """ Interactive Legend ------------------ The following shows how to create a chart with an interactive legend, by binding the selection to ``"legend"``. Such a binding only works with ``selection_point`` when projected over a single field or encoding. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url selection = alt.selection_point(fields=['series'], bind='legend') alt.Chart(source).mark_area().encode( alt.X('yearmonth(date):T', axis=alt.Axis(domain=False, format='%Y', tickSize=0)), alt.Y('sum(count):Q', stack='center', axis=None), alt.Color('series:N', scale=alt.Scale(scheme='category20b')), opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.2)) ).add_params( selection ) ================================================ FILE: tests/examples_arguments_syntax/interactive_reorder_stacked_bars.py ================================================ """ Reorder stacked bar segments ============================ This example uses a calculate transform to check the values of the "site" column vs the clicked values in the legend, and assigns a lower order (0) if there is a match. The use of "indexOf" checks for equality in an array, which here allows for multiple segments to be reordered by holding down the shift key while clicking the legend. """ # category: interactive charts import altair as alt from altair.datasets import data selection = alt.selection_point(fields=['site'], bind='legend') source = data.barley.url alt.Chart(source).mark_bar().transform_calculate( site_order=f"if({selection.name}.site && indexof({selection.name}.site, datum.site) !== -1, 0, 1)" ).encode( x='sum(yield):Q', y='variety:N', color='site:N', order='site_order:N', opacity=alt.when(selection).then(alt.value(0.9)).otherwise(alt.value(0.2)) ).add_params( selection ) ================================================ FILE: tests/examples_arguments_syntax/interactive_scatter_plot.py ================================================ """ Simple Interactive Colored Scatter Plot --------------------------------------- This example shows how to make an interactive scatter plot. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_circle().encode( x='Horsepower', y='Miles_per_Gallon', color='Origin', ).interactive() ================================================ FILE: tests/examples_arguments_syntax/interval_selection.py ================================================ """ Interval Selection with Initial Date Range ========================================== This is an example of creating a stacked chart for which the domain of the top chart can be selected by interacting with the bottom chart. The initial selection range is set using Python's native datetime objects. """ # category: interactive charts import altair as alt from altair.datasets import data import datetime as dt source = data.sp500.url date_range = (dt.date(2007, 6, 30), dt.date(2009, 6, 30)) brush = alt.selection_interval(encodings=['x'], value={'x': date_range}) base = alt.Chart(source).mark_area().encode( x = 'date:T', y = 'price:Q' ).properties( width=600, height=200 ) upper = base.encode( alt.X('date:T', scale=alt.Scale(domain=brush)) ) lower = base.properties( height=60 ).add_params(brush) upper & lower ================================================ FILE: tests/examples_arguments_syntax/interval_selection_map_quakes.py ================================================ """ Interval Selection on a Map =========================== This is an example of a binned bar chart on the right where the filtered overlay is adjusted by interacting with the map on the left. """ # category: interactive charts import altair as alt from altair.datasets import data # load data gdf_quakies = data.earthquakes() gdf_world = data.world_110m(layer="countries") # definition for interactive brush brush = alt.selection_interval( encodings=["longitude"], empty=False, value={"longitude": [-50, -110]} ) # world disk sphere = alt.Chart(alt.sphere()).mark_geoshape( fill="transparent", stroke="lightgray", strokeWidth=1 ) # countries as shapes world = alt.Chart(gdf_world).mark_geoshape( fill="lightgray", stroke="white", strokeWidth=0.1 ) # earthquakes as dots on map quakes = alt.Chart(gdf_quakies).transform_calculate( lon="datum.geometry.coordinates[0]", lat="datum.geometry.coordinates[1]", ).mark_circle(opacity=0.35, tooltip=True).encode( longitude="lon:Q", latitude="lat:Q", color=alt.when(brush).then(alt.value("goldenrod")).otherwise(alt.value("steelblue")), size=alt.Size("mag:Q", scale=alt.Scale(type="pow", range=[1, 1000], domain=[0, 7], exponent=4)), ).add_params(brush) # combine layers for the map left_map = alt.layer(sphere, world, quakes).project(type="mercator") # histogram of binned earthquakes bars = alt.Chart(gdf_quakies).mark_bar().encode( x=alt.X("mag:Q").bin(extent=[0,7]), y="count(mag):Q", color=alt.value("steelblue") ) # filtered earthquakes bars_overlay = bars.encode(color=alt.value("goldenrod")).transform_filter(brush) # combine layers for histogram right_bars = alt.layer(bars, bars_overlay) # vertical concatenate map and bars left_map | right_bars ================================================ FILE: tests/examples_arguments_syntax/iowa_electricity.py ================================================ """ Iowa's Renewable Energy Boom ---------------------------- This example is a fully developed stacked chart using the sample dataset of Iowa's electricity sources. """ # category: case studies import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source, title="Iowa's renewable energy boom").mark_area().encode( x=alt.X( "year:T", title="Year" ), y=alt.Y( "net_generation:Q", stack="normalize", title="Share of net generation", axis=alt.Axis(format=".0%"), ), color=alt.Color( "source:N", legend=alt.Legend(title="Electricity source"), ) ) ================================================ FILE: tests/examples_arguments_syntax/isotype.py ================================================ ''' Isotype Visualization ===================== Isotype Visualization shows the distribution of animals across UK and US. Inspired by `Only An Ocean Between, 1943 `_. Population Live Stock, p.13. This is adapted from Vega-Lite example https://vega.github.io/editor/#/examples/vega-lite/isotype_bar_chart ''' # category: advanced calculations import altair as alt import pandas as pd source = pd.DataFrame([ {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'} ]) domains = ['person', 'cattle', 'pigs', 'sheep'] shape_scale = alt.Scale( domain=domains, range=[ 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z', 'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z', 'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z', 'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z' ] ) color_scale = alt.Scale( domain=domains, range=['rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)'] ) alt.Chart(source).mark_point(filled=True, opacity=1, size=100).encode( alt.X('x:O', axis=None), alt.Y('animal:O', axis=None), alt.Row('country:N', header=alt.Header(title='')), alt.Shape('animal:N', legend=None, scale=shape_scale), alt.Color('animal:N', legend=None, scale=color_scale), ).transform_window( x='rank()', groupby=['country', 'animal'] ).properties(width=550, height=140) ================================================ FILE: tests/examples_arguments_syntax/isotype_emoji.py ================================================ ''' Isotype Visualization with Emoji ================================ Isotype Visualization shows the distribution of animals across UK and US, using unicode emoji marks rather than custom SVG paths (see https://altair-viz.github.io/gallery/isotype.html). This is adapted from Vega-Lite example https://vega.github.io/vega-lite/examples/isotype_bar_chart_emoji.html. ''' # category:advanced calculations import altair as alt import pandas as pd source = pd.DataFrame([ {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'} ]) alt.Chart(source).mark_text(size=45, baseline='middle').encode( alt.X('x:O', axis=None), alt.Y('animal:O', axis=None), alt.Row('country:N', header=alt.Header(title='')), alt.Text('emoji:N') ).transform_calculate( emoji="{'cattle': '🐄', 'pigs': '🐖', 'sheep': '🐏'}[datum.animal]" ).transform_window( x='rank()', groupby=['country', 'animal'] ).properties(width=550, height=140) ================================================ FILE: tests/examples_arguments_syntax/isotype_grid.py ================================================ """ Isotype Grid ------------ This example is a grid of isotype figures. """ # category: advanced calculations import altair as alt import pandas as pd data = pd.DataFrame([dict(id=i) for i in range(1, 101)]) person = ( "M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 " "-0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 " "0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 " "0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 " "0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 " "-0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 " "-0.6 -0.4 -0.6z" ) alt.Chart(data).transform_calculate( row="ceil(datum.id/10)" ).transform_calculate( col="datum.id - datum.row*10" ).mark_point( filled=True, size=50 ).encode( x=alt.X("col:O", axis=None), y=alt.Y("row:O", axis=None), shape=alt.ShapeValue(person) ).properties( width=400, height=400 ).configure_view( strokeWidth=0 ) ================================================ FILE: tests/examples_arguments_syntax/lasagna_plot.py ================================================ """ Lasagna Plot (Dense Time-Series Heatmap) ---------------------------------------- """ # category: tables import altair as alt from altair.datasets import data source = data.stocks() color_condition = ( alt.when(alt.expr.month("datum.value") == 1, alt.expr.date("datum.value") == 1) .then(alt.value("black")) .otherwise(alt.value(None)) ) alt.Chart(source, width=300, height=100).transform_filter( alt.datum.symbol != "GOOG" ).mark_rect().encode( x=alt.X( "yearmonthdate(date):O", axis=alt.Axis( format="%Y", labelAngle=0, labelOverlap=False, labelColor=color_condition, tickColor=color_condition, ), title="Time", ), y=alt.Y("symbol:N", title=None), color=alt.Color("sum(price)", title="Price"), ) ================================================ FILE: tests/examples_arguments_syntax/layer_line_color_rule.py ================================================ """ Line Chart with Layered Aggregates ---------------------------------- This example shows how to make a multi-series line chart of the daily closing stock prices for AAPL, AMZN, GOOG, IBM, and MSFT between 2000 and 2010, along with a layered rule showing the average values. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source).properties(width=550) line = base.mark_line().encode( x='date', y='price', color='symbol' ) rule = base.mark_rule().encode( y='average(price)', color='symbol', size=alt.value(2) ) line + rule ================================================ FILE: tests/examples_arguments_syntax/layered_area_chart.py ================================================ """ Layered Area Chart ------------------ This example shows a layered area chart. """ # category: area charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_area(opacity=0.3).encode( x="year:T", y=alt.Y("net_generation:Q", stack=None), color="source:N" ) ================================================ FILE: tests/examples_arguments_syntax/layered_bar_chart.py ================================================ """ Layered Bar Chart ----------------- This example shows a segmented bar chart that is layered rather than stacked. """ # category: bar charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_bar(opacity=0.7).encode( x='year:O', y=alt.Y('net_generation:Q', stack=None), color="source", ) ================================================ FILE: tests/examples_arguments_syntax/layered_chart_bar_mark.py ================================================ """ Bar and Tick Chart ------------------ How to layer a tick chart on top of a bar chart. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame({ 'project': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], 'score': [25, 57, 23, 19, 8, 47, 8], 'goal': [25, 47, 30, 27, 38, 19, 4] }) bar = alt.Chart(source).mark_bar().encode( x='project', y='score' ).properties( width=alt.Step(40) # controls width of bar. ) tick = alt.Chart(source).mark_tick( color='red', thickness=2, size=40 * 0.9, # controls width of tick. ).encode( x='project', y='goal' ) bar + tick ================================================ FILE: tests/examples_arguments_syntax/layered_chart_with_dual_axis.py ================================================ """ Layered chart with Dual-Axis ---------------------------- This example shows how to create a second independent y axis. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.seattle_weather() base = alt.Chart(source).encode( alt.X('month(date):T', axis=alt.Axis(title=None)) ) area = base.mark_area(opacity=0.3, color='#57A44C').encode( alt.Y('average(temp_max)', axis=alt.Axis(title='Avg. Temperature (°C)', titleColor='#57A44C')), alt.Y2('average(temp_min)') ) line = base.mark_line(stroke='#5276A7', interpolate='monotone').encode( alt.Y('average(precipitation)', axis=alt.Axis(title='Precipitation (inches)', titleColor='#5276A7')) ) alt.layer(area, line).resolve_scale( y = 'independent' ) ================================================ FILE: tests/examples_arguments_syntax/layered_heatmap_text.py ================================================ """ Text over a Heatmap ------------------- An example of a layered chart of text over a heatmap using the cars dataset. """ # category: tables import altair as alt from altair.datasets import data source = data.cars() # Configure common options. We specify the aggregation # as a transform here so we can reuse it in both layers. base = alt.Chart(source).transform_aggregate( mean_horsepower='mean(Horsepower)', groupby=['Origin', 'Cylinders'] ).encode( alt.X('Cylinders:O'), alt.Y('Origin:O'), ) # Configure heatmap heatmap = base.mark_rect().encode( color=alt.Color('mean_horsepower:Q', scale=alt.Scale(scheme='viridis'), legend=alt.Legend(title="Mean of Horsepower"), ) ) color = ( alt.when(alt.datum.mean_horsepower > 150) .then(alt.value("black")) .otherwise(alt.value("white")) ) # Configure text text = base.mark_text(baseline='middle').encode( text=alt.Text('mean_horsepower:Q', format=".0f"), color=color ) # Draw the chart heatmap + text ================================================ FILE: tests/examples_arguments_syntax/layered_histogram.py ================================================ """ Layered Histogram ================= This example shows how to use opacity to make a layered histogram in Altair. """ # category: distributions import pandas as pd import altair as alt import numpy as np np.random.seed(42) # Generating Data source = pd.DataFrame({ 'Trial A': np.random.normal(0, 0.8, 1000), 'Trial B': np.random.normal(-2, 1, 1000), 'Trial C': np.random.normal(3, 2, 1000) }) alt.Chart(source).transform_fold( ['Trial A', 'Trial B', 'Trial C'], as_=['Experiment', 'Measurement'] ).mark_bar( opacity=0.3, binSpacing=0 ).encode( alt.X('Measurement:Q', bin=alt.Bin(maxbins=100)), alt.Y('count()', stack=None), alt.Color('Experiment:N') ) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_arrows.py ================================================ """ Line Chart with Arrows ---------------------- This example shows a simple line chart with two types of arrow annotations. The Unicode character approach is simpler, while the line plus triangle :ref:`point mark ` allows for greater flexibility, such as customizable arrowheads. """ # category: line charts import altair as alt import numpy as np import pandas as pd x = np.linspace(1,7) data = pd.DataFrame({ "x": x, "y": np.sin(x), }) unicode_arrow_with_text = alt.layer( # Arrow alt.Chart().mark_text(size=60, align="left", baseline="bottom", fontWeight=100, angle=340).encode( x=alt.datum(2.8), y=alt.datum(-0.3), text=alt.datum("🠃") # Any unicode symbol could be used instead ), # Text alt.Chart().mark_text(size=14, align="center", baseline="bottom").encode( x=alt.datum(1.8), y=alt.datum(-0.1), text=alt.datum("decreasing") ) ) mark_arrow_with_text = alt.layer( # Arrow line alt.Chart().mark_line(size=2).encode( x=alt.datum(5.4), y=alt.datum(-0.4), x2=alt.datum(5.9), y2=alt.datum(0) ), # Arrow head alt.Chart().mark_point(shape="triangle", filled=True, fillOpacity=1).encode( x=alt.datum(5.9), y=alt.datum(0), angle=alt.AngleValue(23), size=alt.SizeValue(100), color=alt.ColorValue("#000000") ), # Text alt.Chart().mark_text(size=14, align="center", baseline="bottom").encode( x=alt.datum(4.7), y=alt.datum(-0.3), text=alt.datum("increasing") ), ) line_with_points = alt.Chart(data).mark_line(point=True).encode( x=alt.X("x"), y=alt.Y("y"), ) line_with_points + unicode_arrow_with_text + mark_arrow_with_text ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_color_datum.py ================================================ """ Line Chart with Datum for Color ------------------------------- An example of using ``datum`` and ``repeat`` to color a multi-series line chart. This is adapted from this corresponding Vega-Lite Example: `Repeat and Layer to Show Different Movie Measures `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.movies() alt.Chart(source).mark_line().encode( x=alt.X("IMDB Rating", bin=True), y=alt.Y( alt.repeat("layer"), aggregate="mean", title="Mean of US and Worldwide Gross" ), color=alt.datum(alt.repeat("layer")), ).repeat(layer=["US Gross", "Worldwide Gross"]) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_cumsum.py ================================================ """ Line Chart with Cumulative Sum ------------------------------ This chart creates a simple line chart from the cumulative sum of a fields. """ # category: line charts import altair as alt from altair.datasets import data source = data.wheat() alt.Chart(source).mark_line().transform_window( # Sort the data chronologically sort=[{'field': 'year'}], # Include all previous records before the current record and none after # (This is the default value so you could skip it and it would still work.) frame=[None, 0], # What to add up as you go cumulative_wheat='sum(wheat)' ).encode( x='year:O', # Plot the calculated field created by the transformation y='cumulative_wheat:Q' ).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_cumsum_faceted.py ================================================ """ Faceted Line Chart with Cumulative Sum -------------------------------------- This chart creates one facet per natural disaster and shows the cumulative number of deaths for that category. Note the use of different predicates to filter based on both a list and a range. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.disasters() columns_sorted = ['Drought', 'Epidemic', 'Earthquake', 'Flood'] alt.Chart(source).transform_filter( alt.FieldOneOfPredicate(field='Entity', oneOf=columns_sorted), alt.FieldRangePredicate(field='Year', range=[1900, 2000]) ).transform_window( cumulative_deaths='sum(Deaths)', groupby=['Entity'] # Calculate cumulative sum of Deaths by Entity ).mark_line().encode( alt.X('Year:Q', title=None, axis=alt.Axis(format='d')), alt.Y('cumulative_deaths:Q', title=None), alt.Color('Entity:N', legend=None) ).properties( width=300, height=150 ).facet( facet=alt.Facet( 'Entity:N', title=None, sort=columns_sorted, header=alt.Header(labelAnchor='start', labelFontStyle='italic') ), title=alt.Title( text=['Cumulative casualties by type of disaster', 'in the 20th century'], anchor='middle' ), columns=2 ).resolve_axis(y='independent', x='independent') ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_custom_legend.py ================================================ """ Line Chart with Custom Legend ----------------------------- This example uses the argmax aggregation function in order to create a custom legend for a line chart. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source).encode( color=alt.Color("symbol", legend=None) ).transform_filter( "datum.symbol !== 'IBM'" ).properties( width=500 ) line = base.mark_line().encode(x="date", y="price") last_price = base.mark_circle().encode( x=alt.X("last_date['date']:T"), y=alt.Y("last_date['price']:Q") ).transform_aggregate( last_date="argmax(date)", groupby=["symbol"] ) company_name = last_price.mark_text(align="left", dx=4).encode(text="symbol") chart = (line + last_price + company_name).encode( x=alt.X(title="date"), y=alt.Y(title="price") ) chart ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_datum.py ================================================ """ Line Chart with Datum --------------------------------- An example of using ``datum`` to highlight certain values, including a ``DateTime`` value. This is adapted from two corresponding Vega-Lite Examples: `Highlight a Specific Value `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() lines = ( alt.Chart(source) .mark_line() .encode(x="date", y="price", color="symbol") ) xrule = ( alt.Chart() .mark_rule(color="cyan", strokeWidth=2) .encode(x=alt.datum(alt.DateTime(year=2006, month="November"))) ) yrule = ( alt.Chart().mark_rule(strokeDash=[12, 6], size=2).encode(y=alt.datum(350)) ) lines + yrule + xrule ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_generator.py ================================================ """ Line Chart with Sequence Generator ---------------------------------- This examples shows how to create multiple lines using the sequence generator. """ # category: line charts import altair as alt source = alt.sequence(start=0, stop=12.7, step=0.1, as_='x') alt.Chart(source).mark_line().transform_calculate( sin='sin(datum.x)', cos='cos(datum.x)' ).transform_fold( ['sin', 'cos'] ).encode( x='x:Q', y='value:Q', color='key:N' ) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_interpolation.py ================================================ """ Line Chart with Interpolation ----------------------------- This chart shows a line chart with the path interpolated. A full list of interpolation methods is available `in the documentation `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(interpolate="monotone").encode( x="date:T", y="price:Q", color="symbol:N" ) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_points.py ================================================ """ Line Chart with Point Markers ----------------------------- This chart shows a simple line chart with points marking each value. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(point=True).encode( x='date:T', y='price:Q', color='symbol:N' ) ================================================ FILE: tests/examples_arguments_syntax/line_chart_with_points_stroked.py ================================================ """ Line Chart with Stroked Point Markers ------------------------------------- This example shows a simple line chart with points in a different color. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line( point=alt.OverlayMarkDef(filled=False, fill="white") ).encode( x='date:T', y='price:Q', color='symbol:N' ) ================================================ FILE: tests/examples_arguments_syntax/line_custom_order.py ================================================ """ Line Chart with Custom Order ---------------------------- By default, the line's path (order of points in the line) is determined by data values on the temporal/ordinal field. However, a field can be mapped to the order channel for a custom path. For example, to show a pattern of data change over time between gasoline price and average miles driven per capita we use order channel to sort the points in the line by time field (year). The earliest year (1956) is one endpoint and the latest year (2010) is the other endpoint. This is based on Hannah Fairfield's article 'Driving Shifts Into Reverse'. See https://archive.nytimes.com/www.nytimes.com/imagepages/2010/05/02/business/02metrics.html. """ # category: line charts import altair as alt from altair.datasets import data source = data.driving() alt.Chart(source).mark_line(point=True).encode( alt.X("miles", scale=alt.Scale(zero=False)), alt.Y("gas", scale=alt.Scale(zero=False)), order="year", tooltip=["miles", "gas", "year"], ) ================================================ FILE: tests/examples_arguments_syntax/line_percent.py ================================================ """ Line Chart with Percent axis ---------------------------- This example shows how to format the tick labels of the y-axis of a chart as percentages. """ # category: line charts import altair as alt from altair.datasets import data source = data.jobs.url alt.Chart(source).mark_line().encode( alt.X('year:O'), alt.Y('perc:Q', axis=alt.Axis(format='%')), color='sex:N' ).transform_filter( alt.datum.job == 'Welder' ) ================================================ FILE: tests/examples_arguments_syntax/line_with_ci.py ================================================ """ Line Chart with Confidence Interval Band ---------------------------------------- How to make a line chart with a bootstrapped 95% confidence interval band. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.cars() line = alt.Chart(source).mark_line().encode( x='Year', y='mean(Miles_per_Gallon)' ) band = alt.Chart(source).mark_errorband(extent='ci').encode( x='Year', y=alt.Y('Miles_per_Gallon', title='Miles/Gallon'), ) band + line ================================================ FILE: tests/examples_arguments_syntax/line_with_last_value_labeled.py ================================================ """ Line Chart with Last Value Labeled ---------------------------------- This chart shows a line chart with a label annotating the final value """ # category: line charts import altair as alt from altair.datasets import data # Import example data source = data.stocks() # Create a common chart object # Use `transform_filter` to reduce the dataset to clarify our example. Not required. chart = ( alt.Chart(source) .transform_filter(alt.datum.symbol != "IBM") .encode(color=alt.Color("symbol", legend=None)) ) # Draw the line line = chart.mark_line().encode(x="date:T", y="price:Q") # Use the `argmax` aggregate to limit the dataset to the final value label = chart.encode( x=alt.X("max(date):T"), y=alt.Y("price:Q", aggregate=alt.ArgmaxDef(argmax="date")), text="symbol", ) # Create a text label text = label.mark_text(align="left", dx=4) # Create a circle annotation circle = label.mark_circle() # Draw the chart with all the layers combined line + circle + text ================================================ FILE: tests/examples_arguments_syntax/line_with_log_scale.py ================================================ """ Line Chart with Logarithmic Scale --------------------------------- How to make a line chart on a `Logarithmic scale `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.population() alt.Chart(source).mark_line().encode( x='year:O', y=alt.Y( 'sum(people)', scale=alt.Scale(type="log") # Here the scale is applied ) ) ================================================ FILE: tests/examples_arguments_syntax/london_tube.py ================================================ """ London Tube Lines ================= This example shows the London tube lines against the background of the borough boundaries. It is based on the vega-lite example at https://vega.github.io/vega-lite/examples/geo_layer_line_london.html. """ # category: case studies import altair as alt from altair.datasets import data boroughs = alt.topo_feature(data.london_boroughs.url, 'boroughs') tubelines = alt.topo_feature(data.london_tube_lines.url, 'line') centroids = data.london_centroids.url background = alt.Chart(boroughs).mark_geoshape( stroke='white', strokeWidth=2 ).encode( color=alt.value('#eee'), ).properties( width=700, height=500 ) labels = alt.Chart(centroids).mark_text().encode( longitude='cx:Q', latitude='cy:Q', text='bLabel:N', size=alt.value(8), opacity=alt.value(0.6) ).transform_calculate( "bLabel", "indexof (datum.name,' ') > 0 ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name" ) line_scale = alt.Scale(domain=["Bakerloo", "Central", "Circle", "District", "DLR", "Hammersmith & City", "Jubilee", "Metropolitan", "Northern", "Piccadilly", "Victoria", "Waterloo & City"], range=["rgb(137,78,36)", "rgb(220,36,30)", "rgb(255,206,0)", "rgb(1,114,41)", "rgb(0,175,173)", "rgb(215,153,175)", "rgb(106,114,120)", "rgb(114,17,84)", "rgb(0,0,0)", "rgb(0,24,168)", "rgb(0,160,226)", "rgb(106,187,170)"]) lines = alt.Chart(tubelines).mark_geoshape( filled=False, strokeWidth=2 ).encode( alt.Color( 'id:N', legend=alt.Legend( title=None, orient='bottom-right', offset=0 ), scale=line_scale ) ) background + labels + lines ================================================ FILE: tests/examples_arguments_syntax/maps_faceted_species.py ================================================ """ Faceted County-Level Choropleth Maps ------------------------------------ A set of maps arranged in a grid, each showing the distribution of a species' projected habitat across US counties. Each choropleth map uses color intensity to represent the percentage values within county boundaries. """ # category: maps import altair as alt from altair.datasets import data # Load species data with county_id as number csv_data = alt.UrlData(data.species.url, format=alt.CsvDataFormat(parse={'county_id': 'number'})) # Load US counties topology counties = alt.topo_feature(data.us_10m.url, 'counties') chart = alt.Chart(csv_data).mark_geoshape().encode( shape='geo:G', # Geographic shape encoding for map rendering color=alt.Color( 'habitat_yearround_pct:Q', scale=alt.Scale(domain=[0, 1], scheme='viridis', zero=True, nice=False), title=['Suitable Habitat', '% of County'], legend=alt.Legend(format='.0%') ), tooltip=[ alt.Tooltip('county_id:N', title='County ID'), alt.Tooltip('habitat_yearround_pct:Q', title='Habitat %', format='.2%') ], facet=alt.Facet('common_name:N', columns=2, title=None), ).transform_lookup( lookup='county_id', from_=alt.LookupData(data=counties, key='id'), as_='geo' # Join county geometry data ).project(type='albers').properties(width=300, height=200) # Display the chart chart ================================================ FILE: tests/examples_arguments_syntax/mosaic_with_labels.py ================================================ """ Mosaic Chart with Labels ------------------------ """ # category: tables import altair as alt from altair.datasets import data source = data.cars() base = ( alt.Chart(source) .transform_aggregate(count_="count()", groupby=["Origin", "Cylinders"]) .transform_stack( stack="count_", as_=["stack_count_Origin1", "stack_count_Origin2"], offset="normalize", sort=[alt.SortField("Origin", "ascending")], groupby=[], ) .transform_window( x="min(stack_count_Origin1)", x2="max(stack_count_Origin2)", rank_Cylinders="dense_rank()", distinct_Cylinders="distinct(Cylinders)", groupby=["Origin"], frame=[None, None], sort=[alt.SortField("Cylinders", "ascending")], ) .transform_window( rank_Origin="dense_rank()", frame=[None, None], sort=[alt.SortField("Origin", "ascending")], ) .transform_stack( stack="count_", groupby=["Origin"], as_=["y", "y2"], offset="normalize", sort=[alt.SortField("Cylinders", "ascending")], ) .transform_calculate( ny="datum.y + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3", ny2="datum.y2 + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3", nx="datum.x + (datum.rank_Origin - 1) * 0.01", nx2="datum.x2 + (datum.rank_Origin - 1) * 0.01", xc="(datum.nx+datum.nx2)/2", yc="(datum.ny+datum.ny2)/2", ) ) rect = base.mark_rect().encode( x=alt.X("nx:Q", axis=None), x2="nx2", y="ny:Q", y2="ny2", color=alt.Color("Origin:N", legend=None), opacity=alt.Opacity("Cylinders:Q", legend=None), tooltip=["Origin:N", "Cylinders:Q"], ) text = base.mark_text(baseline="middle").encode( x=alt.X("xc:Q", axis=None), y=alt.Y("yc:Q", title="Cylinders"), text="Cylinders:N" ) mosaic = rect + text origin_labels = base.mark_text(baseline="middle", align="center").encode( x=alt.X( "min(xc):Q", axis=alt.Axis(title="Origin", orient="top"), ), color=alt.Color("Origin", legend=None), text="Origin", ) ( (origin_labels & mosaic) .resolve_scale(x="shared") .configure_view(stroke="") .configure_concat(spacing=10) .configure_axis(domain=False, ticks=False, labels=False, grid=False) ) ================================================ FILE: tests/examples_arguments_syntax/multi_series_line.py ================================================ """ Multiple Series Line Chart -------------------------- This example shows how to make a line chart with multiple series of data. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line().encode( x='date:T', y='price:Q', color='symbol:N', ) ================================================ FILE: tests/examples_arguments_syntax/multifeature_scatter_plot.py ================================================ """ Multifeature Scatter Plot ========================= This example shows how to make a scatter plot with multiple feature encodings. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).mark_circle().encode( alt.X('Flipper Length (mm)', scale=alt.Scale(zero=False)), alt.Y('Body Mass (g)', scale=alt.Scale(zero=False, padding=1)), alt.Size('Beak Depth (mm)', scale=alt.Scale(zero=False)), color='Species' ) ================================================ FILE: tests/examples_arguments_syntax/multiline_highlight.py ================================================ """ Multi-Line Highlight ==================== This multi-line chart uses an invisible Voronoi tessellation to handle pointerover to identify the nearest point and then highlight the line on which the point falls. It is adapted from the Vega-Lite example found at https://bl.ocks.org/amitkaps/fe4238e716db53930b2f1a70d3401701 """ # category: interactive charts import altair as alt from altair.datasets import data source = data.stocks() highlight = alt.selection_point(on='pointerover', fields=['symbol'], nearest=True) base = alt.Chart(source).encode( x='date:T', y='price:Q', color='symbol:N' ) points = base.mark_circle().encode( opacity=alt.value(0) ).add_params( highlight ).properties( width=600 ) lines = base.mark_line().encode( size=alt.when(~highlight).then(alt.value(1)).otherwise(alt.value(3)) ) points + lines ================================================ FILE: tests/examples_arguments_syntax/multiline_tooltip.py ================================================ """ Multi-Line Tooltip ================== This example shows how you can use selections and layers to create a tooltip-like behavior tied to the x position of the cursor. If you are looking for more standard tooltips, see the :ref:`gallery_multiline_tooltip_standard` example. In this example, we'll employ a little trick to isolate the x-position of the cursor: we add some transparent points with only an x encoding (no y encoding) and tie a *nearest* selection to these, tied to the "x" field. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(42) columns = ["A", "B", "C"] source = pd.DataFrame( np.cumsum(np.random.randn(100, 3), 0).round(2), columns=columns, index=pd.RangeIndex(100, name="x") ) source = source.reset_index().melt("x", var_name="category", value_name="y") # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection_point(nearest=True, on="pointerover", fields=["x"], empty=False) # The basic line line = alt.Chart(source).mark_line(interpolate="basis").encode( x="x:Q", y="y:Q", color="category:N" ) # Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = alt.Chart(source).mark_point().encode( x="x:Q", opacity=alt.value(0), ).add_params( nearest ) when_near = alt.when(nearest) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=when_near.then(alt.value(1)).otherwise(alt.value(0)) ) # Draw text labels near the points, and highlight based on selection text = line.mark_text(align="left", dx=5, dy=-5).encode( text=when_near.then("y:Q").otherwise(alt.value(" ")) ) # Draw a rule at the location of the selection rules = alt.Chart(source).mark_rule(color="gray").encode( x="x:Q", ).transform_filter( nearest ) # Put the five layers into a chart and bind the data alt.layer( line, selectors, points, rules, text ).properties( width=600, height=300 ) ================================================ FILE: tests/examples_arguments_syntax/multiline_tooltip_standard.py ================================================ """ Multi-Line Tooltip (Standard) ============================= This example shows how to add a standard tooltip to the same chart as in :ref:`gallery_multiline_tooltip`. You can find another example using this approach in the documentation on the :ref:`user-guide-pivot-transform` transformation. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(42) columns = ["A", "B", "C"] source = pd.DataFrame( np.cumsum(np.random.randn(100, 3), 0).round(2), columns=columns, index=pd.RangeIndex(100, name="x"), ) source = source.reset_index().melt("x", var_name="category", value_name="y") # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection_point(nearest=True, on="pointerover", fields=["x"], empty=False) # The basic line line = alt.Chart(source).mark_line(interpolate="basis").encode( x="x:Q", y="y:Q", color="category:N" ) when_near = alt.when(nearest) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=when_near.then(alt.value(1)).otherwise(alt.value(0)) ) # Draw a rule at the location of the selection rules = alt.Chart(source).transform_pivot( "category", value="y", groupby=["x"] ).mark_rule(color="gray").encode( x="x:Q", opacity=when_near.then(alt.value(0.3)).otherwise(alt.value(0)), tooltip=[alt.Tooltip(c, type="quantitative") for c in columns], ).add_params(nearest) # Put the five layers into a chart and bind the data alt.layer( line, points, rules ).properties( width=600, height=300 ) ================================================ FILE: tests/examples_arguments_syntax/multiple_interactions.py ================================================ """ Multiple Interactions ===================== This example shows how multiple user inputs can be layered onto a chart. The four inputs have functionality as follows: * Dropdown: Filters the movies by genre * Radio Buttons: Highlights certain films by Worldwide Gross * Mouse Drag and Scroll: Zooms the x and y scales to allow for panning. """ # category: interactive charts import altair as alt from altair.datasets import data movies = alt.UrlData( data.movies.url, format=alt.DataFormat(parse={'Release Date':'date'}) ) ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R'] genres = ['Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy', 'Thriller/Suspense', 'Western'] base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate( Hundred_Million_Production = "datum['Production Budget'] > 100000000.0 ? 100 : 10", Release_Year = "year(datum['Release Date'])" ).transform_filter( alt.datum['IMDB Rating'] > 0 ).transform_filter( alt.FieldOneOfPredicate(field='MPAA Rating', oneOf=ratings) ).encode( x=alt.X('Worldwide Gross:Q', scale=alt.Scale(domain=(100000,10**9), clamp=True)), y='IMDB Rating:Q', tooltip='Title:N' ) # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1, name='Release Year') slider_selection = alt.selection_point(bind=year_slider, fields=['Release_Year']) filter_year = base.add_params( slider_selection ).transform_filter( slider_selection ).properties(title='Slider Filtering') # A dropdown filter genre_dropdown = alt.binding_select(options=genres, name='Genre') genre_select = alt.selection_point(fields=['Major Genre'], bind=genre_dropdown) filter_genres = base.add_params( genre_select ).transform_filter( genre_select ).properties(title='Dropdown Filtering') #color changing marks rating_radio = alt.binding_radio(options=ratings, name='Rating') rating_select = alt.selection_point(fields=['MPAA Rating'], bind=rating_radio) rating_color = ( alt.when(rating_select) .then(alt.Color('MPAA Rating:N', legend=None)) .otherwise(alt.value('lightgray')) ) highlight_ratings = base.add_params( rating_select ).encode( color=rating_color ).properties(title='Radio Button Highlighting') # Boolean selection for format changes input_checkbox = alt.binding_checkbox(name='Big Budget Films ') checkbox_selection = alt.param(bind=input_checkbox) size_checkbox = ( alt.when(checkbox_selection) .then(alt.Size('Big Budget Film:N', scale=alt.Scale(range=[25, 150]))) .otherwise(alt.value(25)) ) budget_sizing = base.add_params( checkbox_selection ).encode( size=size_checkbox ).properties(title='Checkbox Formatting') (filter_year | budget_sizing) & (highlight_ratings | filter_genres) ================================================ FILE: tests/examples_arguments_syntax/natural_disasters.py ================================================ """ Global Deaths from Natural Disasters ------------------------------------ This example shows a proportional symbols visualization of deaths from natural disasters by year and type. """ # category: case studies import altair as alt from altair.datasets import data source = data.disasters.url alt.Chart(source).transform_filter( alt.datum.Entity != 'All natural disasters' ).mark_circle( opacity=0.8, stroke='black', strokeWidth=1, strokeOpacity=0.4 ).encode( x=alt.X('Year:T', title=None, scale=alt.Scale(domain=['1899','2018'])), y=alt.Y( 'Entity:N', sort=alt.EncodingSortField(field="Deaths", op="sum", order='descending'), title=None ), size=alt.Size('Deaths:Q', scale=alt.Scale(range=[0, 2500]), legend=alt.Legend(title='Deaths', clipHeight=30, format='s') ), color=alt.Color('Entity:N', legend=None), tooltip=[ "Entity:N", alt.Tooltip("Year:T", format='%Y'), alt.Tooltip("Deaths:Q", format='~s') ], ).properties( width=450, height=320, title=alt.Title( text="Global Deaths from Natural Disasters (1900-2017)", subtitle="The size of the bubble represents the total death count per year, by type of disaster", anchor='start' ) ).configure_axisY( domain=False, ticks=False, offset=10 ).configure_axisX( grid=False, ).configure_view( stroke=None ) ================================================ FILE: tests/examples_arguments_syntax/normalized_stacked_area_chart.py ================================================ """ Normalized Stacked Area Chart ----------------------------- This example shows how to make a normalized stacked area chart. """ # category: area charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q", stack="normalize"), color="source:N" ) ================================================ FILE: tests/examples_arguments_syntax/normalized_stacked_bar_chart.py ================================================ """ Normalized Stacked Bar Chart ---------------------------- This is an example of a normalized stacked bar chart using data which contains crop yields over different regions and different years in the 1930s. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x=alt.X('sum(yield)', stack="normalize"), y='variety', color='site' ) ================================================ FILE: tests/examples_arguments_syntax/normed_parallel_coordinates.py ================================================ """ Normalized Parallel Coordinates ------------------------------- A `Parallel Coordinates `_ chart is a chart that lets you visualize the individual data points by drawing a single line for each of them. Such a chart can be created in Altair by first transforming the data into a suitable representation. This example shows a modified parallel coordinates chart with the Penguins dataset, where the y-axis shows the value after min-max rather than the raw value. It's a simplified Altair version of `the VegaLite version `_ """ # category: advanced calculations import altair as alt from altair.datasets import data from altair import datum source = data.penguins() alt.Chart(source).transform_window( index='count()' ).transform_fold( ['Beak Length (mm)', 'Beak Depth (mm)', 'Flipper Length (mm)'] ).transform_joinaggregate( min='min(value)', max='max(value)', groupby=['key'] ).transform_calculate( minmax_value=(datum.value-datum.min)/(datum.max-datum.min), mid=(datum.min+datum.max)/2 ).mark_line().encode( x='key:N', y='minmax_value:Q', color='Species:N', detail='index:N', opacity=alt.value(0.5) ).properties(width=500) ================================================ FILE: tests/examples_arguments_syntax/one_dot_per_zipcode.py ================================================ """ One Dot Per Zipcode ----------------------- This example shows a geographical plot with one dot per zipcode. """ # category: case studies import altair as alt from altair.datasets import data # Since the data is more than 5,000 rows we'll import it from a URL source = data.zipcodes.url alt.Chart(source).transform_calculate( "leading digit", alt.expr.substring(alt.datum.zip_code, 0, 1) ).mark_circle(size=3).encode( longitude='longitude:Q', latitude='latitude:Q', color='leading digit:N', tooltip='zip_code:N' ).project( type='albersUsa' ).properties( width=650, height=400 ) ================================================ FILE: tests/examples_arguments_syntax/pacman_chart.py ================================================ """ Pacman Chart ------------ Chart made using ``mark_arc`` and constant values. This could also be made using ``alt.Chart(source).mark_arc(color = "gold", theta = (5/8)*np.pi, theta2 = (19/8)*np.pi,radius=100)``. """ # category: circular plots import numpy as np import altair as alt alt.Chart().mark_arc(color="gold").encode( theta=alt.datum((5 / 8) * np.pi, scale=None), theta2=alt.datum((19 / 8) * np.pi), radius=alt.datum(100, scale=None), ) ================================================ FILE: tests/examples_arguments_syntax/parallel_coordinates.py ================================================ """ Parallel Coordinates -------------------- A `Parallel Coordinates `_ chart is a chart that lets you visualize the individual data points by drawing a single line for each of them. Such a chart can be created in Altair by first transforming the data into a suitable representation. This example shows a parallel coordinates chart with the Penguins dataset. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).transform_window( index='count()' ).transform_fold( ['Beak Length (mm)', 'Beak Depth (mm)', 'Flipper Length (mm)'] ).mark_line().encode( x='key:N', y='value:Q', color='Species:N', detail='index:N', opacity=alt.value(0.5) ).properties(width=500) ================================================ FILE: tests/examples_arguments_syntax/percentage_of_total.py ================================================ """ Calculating Percentage of Total ------------------------------- This chart demonstrates how to use a joinaggregate transform to display data values as a percentage of total. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame({'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'], 'Time': [8, 2, 4, 8, 2]}) alt.Chart(source).transform_joinaggregate( TotalTime='sum(Time)', ).transform_calculate( PercentOfTotal="datum.Time / datum.TotalTime" ).mark_bar().encode( alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')), y='Activity:N' ) ================================================ FILE: tests/examples_arguments_syntax/pie_chart.py ================================================ """ Pie Chart --------- This example shows how to make a Pie Chart using ``mark_arc``. This is adapted from a corresponding Vega-Lite Example: `Pie Chart `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) alt.Chart(source).mark_arc().encode( theta=alt.Theta(field="value", type="quantitative"), color=alt.Color(field="category", type="nominal"), ) ================================================ FILE: tests/examples_arguments_syntax/pie_chart_with_labels.py ================================================ """ Pie Chart with Labels --------------------- This example shows how to layer text over arc marks (``mark_arc``) to label pie charts. This is adapted from a corresponding Vega-Lite Example: `Pie Chart with Labels `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame( {"category": ["a", "b", "c", "d", "e", "f"], "value": [4, 6, 10, 3, 7, 8]} ) base = alt.Chart(source).encode( theta=alt.Theta("value:Q", stack=True), color=alt.Color("category:N", legend=None) ) pie = base.mark_arc(outerRadius=120) text = base.mark_text(radius=140, size=20).encode(text="category:N") pie + text ================================================ FILE: tests/examples_arguments_syntax/point_map.py ================================================ """ Point map ========= This is a layered map that shows the positions of airports on a background of U.S. states. """ # category: maps import altair as alt from altair.datasets import data # Read in points airports = data.airports() # Read in polygons from topojson states = alt.topo_feature(data.us_10m.url, feature='states') # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( width=500, height=300 ).project('albersUsa') # airport positions on background points = alt.Chart(airports).mark_circle( size=10, color='steelblue' ).encode( longitude='longitude:Q', latitude='latitude:Q', tooltip=['name', 'city', 'state'] ) background + points ================================================ FILE: tests/examples_arguments_syntax/polar_bar_chart.py ================================================ """ Polar Bar Chart --------------- This example shows how to make a polar bar chart using ``mark_arc``. This could also have been called a "pie chart with axis labels", but is more commonly referred to as a polar bar chart. The axis lines are created using pie charts with only the stroke visible. """ # category: circular plots import math import altair as alt import pandas as pd source = pd.DataFrame({ "hour": range(24), "observations": [2, 2, 2, 2, 2, 3, 4, 4, 8, 8, 9, 7, 5, 6, 8, 8, 7, 7, 4, 3, 3, 2, 2, 2] }) polar_bars = alt.Chart(source).mark_arc(stroke='white', tooltip=True).encode( theta=alt.Theta("hour:O"), radius=alt.Radius('observations', scale=alt.Scale(type='linear')), radius2=alt.datum(1), ) # Create the circular axis lines for the number of observations axis_rings = alt.Chart(pd.DataFrame({"ring": range(2, 11, 2)})).mark_arc(stroke='lightgrey', fill=None).encode( theta=alt.value(2 * math.pi), radius=alt.Radius('ring', stack=False) ) axis_rings_labels = axis_rings.mark_text(color='grey', radiusOffset=5, align='left').encode( text="ring", theta=alt.value(math.pi / 4) ) # Create the straight axis lines for the time of the day axis_lines = alt.Chart(pd.DataFrame({ "radius": 10, "theta": math.pi / 2, 'hour': ['00:00', '06:00', '12:00', '18:00'] })).mark_arc(stroke='lightgrey', fill=None).encode( theta=alt.Theta('theta', stack=True), radius=alt.Radius('radius'), radius2=alt.datum(1), ) axis_lines_labels = axis_lines.mark_text( color='grey', radiusOffset=5, thetaOffset=-math.pi / 4, # These adjustments could be left out with a larger radius offset, but they make the label positioning a bit cleaner align=alt.expr('datum.hour == "18:00" ? "right" : datum.hour == "06:00" ? "left" : "center"'), baseline=alt.expr('datum.hour == "00:00" ? "bottom" : datum.hour == "12:00" ? "top" : "middle"'), ).encode(text="hour") alt.layer( axis_rings, polar_bars, axis_rings_labels, axis_lines, axis_lines_labels, title=['Observations throughout the day', ''] ) ================================================ FILE: tests/examples_arguments_syntax/poly_fit_regression.py ================================================ """ Polynomial Fit Plot with Regression Transform ============================================= This example shows how to overlay data with multiple fitted polynomials using the regression transform. """ # category: uncertainties and trends import numpy as np import pandas as pd import altair as alt # Generate some random data rng = np.random.RandomState(1) x = rng.rand(40) ** 2 y = 10 - 1.0 / (x + 0.1) + rng.randn(40) source = pd.DataFrame({"x": x, "y": y}) # Define the degree of the polynomial fits degree_list = [1, 3, 5] base = alt.Chart(source).mark_circle(color="black").encode( alt.X("x"), alt.Y("y") ) polynomial_fit = [ base.transform_regression( "x", "y", method="poly", order=order, as_=["x", str(order)] ) .mark_line() .transform_fold([str(order)], as_=["degree", "y"]) .encode(alt.Color("degree:N")) for order in degree_list ] alt.layer(base, *polynomial_fit) ================================================ FILE: tests/examples_arguments_syntax/pyramid.py ================================================ """ Pyramid Pie Chart ----------------- Altair reproduction of http://robslink.com/SAS/democd91/pyramid_pie.htm """ # category: case studies import altair as alt import pandas as pd category = ['Sky', 'Shady side of a pyramid', 'Sunny side of a pyramid'] color = ["#416D9D", "#674028", "#DEAC58"] df = pd.DataFrame({'category': category, 'value': [75, 10, 15]}) alt.Chart(df).mark_arc(outerRadius=80).encode( alt.Theta('value:Q', scale=alt.Scale(range=[2.356, 8.639])), alt.Color('category:N', scale=alt.Scale(domain=category, range=color), legend=alt.Legend(title=None, orient='none', legendX=160, legendY=50)), order='value:Q' ).properties(width=150, height=150).configure_view(strokeOpacity=0) ================================================ FILE: tests/examples_arguments_syntax/radial_chart.py ================================================ """ Radial Chart ------------ This radial plot uses both angular and radial extent to convey multiple dimensions of data. This is adapted from a corresponding Vega-Lite Example: `Radial Plot `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({"values": [12, 23, 47, 6, 52, 19]}) base = alt.Chart(source).encode( theta=alt.Theta("values:Q", stack=True), radius=alt.Radius("values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)), color="values:N", ) c1 = base.mark_arc(innerRadius=20, stroke="#fff") c2 = base.mark_text(radiusOffset=10).encode(text="values:Q") c1 + c2 ================================================ FILE: tests/examples_arguments_syntax/ranged_dot_plot.py ================================================ """ Ranged Dot Plot --------------- This example shows a ranged dot plot to convey changing life expectancy for the five most populous countries (between 1955 and 2000). """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.countries.url chart = ( alt.Chart(source) .encode(x="life_expect:Q", y="country:N") .transform_filter( alt.FieldOneOfPredicate( field="country", oneOf=["China", "India", "United States", "Indonesia", "Brazil"], ) ) .transform_filter(alt.FieldOneOfPredicate(field="year", oneOf=[1955, 2000])) ) line = chart.mark_line(color="#db646f").encode(detail="country:N") # Add points for life expectancy in 1955 & 2000 color = alt.Color( "year:O", scale=alt.Scale(domain=[1955, 2000], range=["#e6959c", "#911a24"]) ) points = ( chart.mark_point( size=100, opacity=1, filled=True, ) .encode(color=color) .interactive() ) (line + points) ================================================ FILE: tests/examples_arguments_syntax/ridgeline_plot.py ================================================ """ Ridgeline plot -------------- A `Ridgeline plot `_ lets you visualize distribution of a numeric value for different subsets of data (what we call "facets" in Altair). Such a chart can be created in Altair by first transforming the data into a suitable representation. """ # category: distributions import altair as alt from altair.datasets import data source = data.seattle_weather.url step = 20 overlap = 1 alt.Chart(source, height=step).transform_timeunit( Month='month(date)' ).transform_joinaggregate( mean_temp='mean(temp_max)', groupby=['Month'] ).transform_bin( ['bin_max', 'bin_min'], 'temp_max' ).transform_aggregate( value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max'] ).transform_impute( impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0 ).mark_area( interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5 ).encode( alt.X('bin_min:Q', bin='binned', title='Maximum Daily Temperature (C)'), alt.Y( 'value:Q', scale=alt.Scale(range=[step, -step * overlap]), axis=None ), alt.Fill( 'mean_temp:Q', legend=None, scale=alt.Scale(domain=[30, 5], scheme='redyellowblue') ) ).facet( row=alt.Row( 'Month:T', title=None, header=alt.Header(labelAngle=0, labelAlign='left', format='%B') ) ).properties( title='Seattle Weather', bounds='flush' ).configure_facet( spacing=0 ).configure_view( stroke=None ).configure_title( anchor='end' ) ================================================ FILE: tests/examples_arguments_syntax/scatter_faceted.py ================================================ """ Faceted Scatter Plot -------------------- A series of scatter plots, one for each country/area of origin. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source, width=100, height=100).mark_point().encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", row="Origin:N", ) ================================================ FILE: tests/examples_arguments_syntax/scatter_href.py ================================================ """ Scatter Plot with Href ---------------------- This example shows a scatter plot with an ``href`` encoding constructed from the car name. With this encoding, you can click on any of the points to open a google search for the car name. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).transform_calculate( url='https://www.google.com/search?q=' + alt.datum.Name ).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color='Origin:N', href='url:N', tooltip=['Name:N', 'url:N'] ) ================================================ FILE: tests/examples_arguments_syntax/scatter_linked_brush.py ================================================ """ Multi-panel Scatter Plot with Linked Brushing --------------------------------------------- This is an example of using an interval selection to control the color of points across multiple panels. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval(resolve='global') base = alt.Chart(source).mark_point().encode( y='Miles_per_Gallon', color=alt.when(brush).then("Origin").otherwise(alt.ColorValue("gray")), ).add_params( brush ).properties( width=250, height=250 ) base.encode(x='Horsepower') | base.encode(x='Acceleration') ================================================ FILE: tests/examples_arguments_syntax/scatter_linked_table.py ================================================ """ Brushing Scatter Plot to Show Data on a Table --------------------------------------------- A scatter plot of the cars dataset, with data tables for horsepower, MPG, and origin. The tables update to reflect the selection on the scatter plot. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() # Brush for selection brush = alt.selection_interval() # Scatter Plot points = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then(alt.value("steelblue")).otherwise(alt.value("grey")) ).add_params(brush) # Base chart for data tables ranked_text = alt.Chart(source).mark_text(align='right').encode( y=alt.Y('row_number:O', axis=None) ).transform_filter( brush ).transform_window( row_number='row_number()' ).transform_filter( alt.datum.row_number < 15 ) # Data Tables horsepower = ranked_text.encode(text='Horsepower:N').properties( title=alt.Title(text='Horsepower', align='right') ) mpg = ranked_text.encode(text='Miles_per_Gallon:N').properties( title=alt.Title(text='MPG', align='right') ) origin = ranked_text.encode(text='Origin:N').properties( title=alt.Title(text='Origin', align='right') ) text = alt.hconcat(horsepower, mpg, origin) # Combine data tables # Build chart alt.hconcat( points, text ).resolve_legend( color="independent" ).configure_view( stroke=None ) ================================================ FILE: tests/examples_arguments_syntax/scatter_marginal_hist.py ================================================ """ Scatter Plot with Faceted Marginal Histograms --------------------------------------------- This example demonstrates how to generate a scatter plot, with faceted marginal histograms that share their respective x- and y-limits. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() base = alt.Chart(source) base_bar = base.mark_bar(opacity=0.3, binSpacing=0) xscale = alt.Scale(domain=(170, 235)) yscale = alt.Scale(domain=(2500, 6500)) points = base.mark_circle().encode( alt.X("Flipper Length (mm)", scale=xscale), alt.Y("Body Mass (g)", scale=yscale), color="Species", ) top_hist = ( base_bar .encode( alt.X( "Flipper Length (mm):Q", # when using bins, the axis scale is set through # the bin extent, so we do not specify the scale here # (which would be ignored anyway) bin=alt.Bin(maxbins=20, extent=xscale.domain), stack=None, title="", ), alt.Y("count()", stack=None, title=""), alt.Color("Species:N"), ) .properties(height=60) ) right_hist = ( base_bar .encode( alt.Y( "Body Mass (g):Q", bin=alt.Bin(maxbins=20, extent=yscale.domain), stack=None, title="", ), alt.X("count()", stack=None, title=""), alt.Color("Species:N"), ) .properties(width=60) ) top_hist & (points | right_hist) ================================================ FILE: tests/examples_arguments_syntax/scatter_matrix.py ================================================ """ Scatter Matrix -------------- An example of using a RepeatChart to construct a multi-panel scatter plot with linked panning and zooming. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_circle().encode( alt.X(alt.repeat("column"), type='quantitative'), alt.Y(alt.repeat("row"), type='quantitative'), color='Origin:N' ).properties( width=150, height=150 ).repeat( row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'], column=['Miles_per_Gallon', 'Acceleration', 'Horsepower'] ).interactive() ================================================ FILE: tests/examples_arguments_syntax/scatter_point_paths_hover.py ================================================ """ Scatter plot with point paths on hover with search box ====================================================== This example combines cross-sectional analysis (comparing countries at a single point in time) with longitudinal analysis (tracking changes in individual countries over time), using an interactive visualization technique inspired by [this Vega example](https://vega.github.io/vega/examples/global-development/). Key features: 1. Point Paths. On hover, shows data trajectories using a trail mark that thickens from past to present, clearly indicating the direction of time. 2. Search Box. Implements a case-insensitive regex filter for country names, enabling dynamic, flexible data point selection to enhance exploratory analysis. """ # category: interactive charts import altair as alt from altair.datasets import data # Data source source = data.gapminder.url # X-value slider x_slider = alt.binding_range(min=1955, max=2005, step=5, name='Year ') x_select = alt.selection_point(name="x_select", fields=['year'], bind=x_slider, value=1980) # Hover selection hover = alt.selection_point(on='mouseover', fields=['country'], empty=False) # A separate hover for the points since these need empty=True hover_point_opacity = alt.selection_point(on='mouseover', fields=['country']) # Search box for country name search_box = alt.param( value='', bind=alt.binding(input='search', placeholder="Country", name='Search ') ) # Base chart base = alt.Chart(source).encode( x=alt.X('fertility:Q', scale=alt.Scale(zero=False), title='Babies per woman (total fertility rate)'), y=alt.Y('life_expect:Q', scale=alt.Scale(zero=False), title='Life expectancy'), color=alt.Color('region:N', title='Region', legend=alt.Legend(orient='bottom-left', titleFontSize=14, labelFontSize=12), scale=alt.Scale(scheme='dark2')), detail='country:N' ).transform_calculate( region="""{ '0': 'South Asia', '1': 'Europe & Central Asia', '2': 'Sub-Saharan Africa', '3': 'The Americas', '4': 'East Asia & Pacific', '5': 'Middle East & North Africa' }[datum.cluster]""" ).transform_filter( # Exclude North Korea and South Korea due to source data error "datum.country !== 'North Korea' && datum.country !== 'South Korea'" ) search_matches = alt.expr.test(alt.expr.regexp(search_box, "i"), alt.datum.country) opacity = ( alt.when(hover_point_opacity, search_matches) .then(alt.value(0.8)) .otherwise(alt.value(0.1)) ) # Points that are always visible (filtered by slider and search) visible_points = base.mark_circle(size=100).encode( opacity=opacity ).transform_filter( x_select ).add_params( hover, hover_point_opacity, x_select ) when_hover = alt.when(hover) hover_line = alt.layer( # Line layer base.mark_trail().encode( order=alt.Order( 'year:Q', sort='ascending' ), size=alt.Size( 'year:Q', scale=alt.Scale(domain=[1955, 2005], range=[1, 12]), legend=None ), opacity=when_hover.then(alt.value(0.3)).otherwise(alt.value(0)), color=alt.value('#222222') ), # Point layer base.mark_point(size=50).encode( opacity=when_hover.then(alt.value(0.8)).otherwise(alt.value(0)), ) ) # Year labels year_labels = base.mark_text(align='left', dx=5, dy=-5, fontSize=14).encode( text='year:O', color=alt.value('#222222') ).transform_filter(hover) # Country labels country_labels = alt.Chart(source).mark_text( align='left', dx=-15, dy=-25, fontSize=18, fontWeight='bold' ).encode( x='fertility:Q', y='life_expect:Q', text='country:N', color=alt.value('black'), opacity=when_hover.then(alt.value(1)).otherwise(alt.value(0)) ).transform_window( rank='rank(life_expect)', sort=[alt.SortField('life_expect', order='descending')], groupby=['country'] # places label atop highest point on y-axis on hover ).transform_filter( alt.datum.rank == 1 ).transform_aggregate( life_expect='max(life_expect)', fertility='max(fertility)', groupby=['country'] ) background_year = alt.Chart(source).mark_text( baseline='middle', fontSize=96, opacity=0.2 ).encode( text='year:O' ).transform_filter( x_select ).transform_aggregate( year='max(year)' ) # Combine all layers chart = alt.layer( visible_points, year_labels, country_labels, hover_line, background_year ).properties( width=500, height=500, padding=10 # Padding ensures labels fit ).configure_axis( labelFontSize=12, titleFontSize=12 ).add_params(search_box) chart ================================================ FILE: tests/examples_arguments_syntax/scatter_qq.py ================================================ """ Quantile-Quantile Plot ---------------------- A quantile-quantile plot comparing input data to theoretical distributions. """ # category: distributions import altair as alt from altair.datasets import data source = data.normal_2d.url base = alt.Chart(source).transform_quantile( 'u', step=0.01, as_ = ['p', 'v'] ).transform_calculate( uniform = 'quantileUniform(datum.p)', normal = 'quantileNormal(datum.p)' ).mark_point().encode( alt.Y('v:Q') ) base.encode(x='uniform:Q') | base.encode(x='normal:Q') ================================================ FILE: tests/examples_arguments_syntax/scatter_tooltips.py ================================================ """ Simple Scatter Plot with Tooltips --------------------------------- A scatter plot of the cars dataset, with tooltips showing selected column values when you hover over points. We make the points larger so that it is easier to hover over them. """ # category: simple charts import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_circle(size=60).encode( x='Horsepower', y='Miles_per_Gallon', color='Origin', tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon'] ).interactive() ================================================ FILE: tests/examples_arguments_syntax/scatter_with_histogram.py ================================================ """ Scatter Plot and Histogram with Interval Selection ================================================== This example shows how to link a scatter plot and a histogram together such that an interval selection in the histogram will plot the selected values in the scatter plot. Note that both subplots need to know about the `mbin` field created by the `transform_bin` method. In order to achieve this, the data is not passed to the `Chart()` instances creating the subplots, but directly in the `hconcat()` function, which joins the two plots together. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np x = np.random.normal(size=100) y = np.random.normal(size=100) m = np.random.normal(15, 1, size=100) source = pd.DataFrame({"x": x, "y":y, "m":m}) # interval selection in the scatter plot pts = alt.selection_interval(encodings=["x"]) # left panel: scatter plot points = alt.Chart().mark_point(filled=True, color="black").encode( x='x', y='y' ).transform_filter( pts ).properties( width=300, height=300 ) # right panel: histogram mag = alt.Chart().mark_bar().encode( x='mbin:N', y="count()", color=alt.when(pts).then(alt.value("black")).otherwise(alt.value("lightgray")) ).properties( width=300, height=300 ).add_params(pts) # build the chart: alt.hconcat( points, mag, data=source ).transform_bin( "mbin", field="m", bin=alt.Bin(maxbins=20) ) ================================================ FILE: tests/examples_arguments_syntax/scatter_with_labels.py ================================================ """ Simple Scatter Plot with Labels =============================== This example shows a basic scatter plot with labels created with Altair. """ # category: scatter plots import altair as alt import pandas as pd source = pd.DataFrame({ 'x': [1, 3, 5, 7, 9], 'y': [1, 3, 5, 7, 9], 'label': ['A', 'B', 'C', 'D', 'E'] }) points = alt.Chart(source).mark_point().encode( x='x:Q', y='y:Q' ) text = points.mark_text( align='left', baseline='middle', dx=7 ).encode( text='label' ) points + text ================================================ FILE: tests/examples_arguments_syntax/scatter_with_layered_histogram.py ================================================ """ Interactive Scatter Plot and Linked Layered Histogram ===================================================== This example shows how to link a scatter plot and a histogram together such that clicking on a point in the scatter plot will isolate the distribution corresponding to that point, and vice versa. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np # generate fake data source = pd.DataFrame({ 'gender': ['M']*1000 + ['F']*1000, 'height':np.concatenate(( np.random.normal(69, 7, 1000), np.random.normal(64, 6, 1000) )), 'weight': np.concatenate(( np.random.normal(195.8, 144, 1000), np.random.normal(167, 100, 1000) )), 'age': np.concatenate(( np.random.normal(45, 8, 1000), np.random.normal(51, 6, 1000) )) }) selector = alt.selection_point(fields=['gender']) color_scale = alt.Scale(domain=['M', 'F'], range=['#1FC3AA', '#8624F5']) color = ( alt.when(selector) .then(alt.Color("gender:N", scale=color_scale)) .otherwise(alt.value("lightgray")) ) base = alt.Chart(source).properties( width=250, height=250 ).add_params(selector) points = base.mark_point(filled=True, size=200).encode( x=alt.X('mean(height):Q', scale=alt.Scale(domain=[0,84])), y=alt.Y('mean(weight):Q', scale=alt.Scale(domain=[0,250])), color=color, ) hists = base.mark_bar(opacity=0.5, thickness=100).encode( x=alt.X('age', bin=alt.Bin(step=5), # step keeps bin size the same scale=alt.Scale(domain=[0,100])), y=alt.Y('count()', stack=None, scale=alt.Scale(domain=[0,350])), color=alt.Color('gender:N', scale=color_scale) ).transform_filter( selector ) points | hists ================================================ FILE: tests/examples_arguments_syntax/scatter_with_loess.py ================================================ """ Scatter Plot with LOESS Lines ----------------------------- This example shows how to add a trend line to a scatter plot using the LOESS transform (LOcally Estimated Scatter Plot Smoothing). """ # category: uncertainties and trends import altair as alt import pandas as pd import numpy as np np.random.seed(1) source = pd.DataFrame({ 'x': np.arange(100), 'A': np.random.randn(100).cumsum(), 'B': np.random.randn(100).cumsum(), 'C': np.random.randn(100).cumsum(), }) base = alt.Chart(source).mark_circle(opacity=0.5).transform_fold( fold=['A', 'B', 'C'], as_=['category', 'y'] ).encode( alt.X('x:Q'), alt.Y('y:Q'), alt.Color('category:N') ) base + base.transform_loess('x', 'y', groupby=['category']).mark_line(size=4) ================================================ FILE: tests/examples_arguments_syntax/scatter_with_minimap.py ================================================ """ Scatter Plot with Minimap ------------------------- This example shows how to create a miniature version of a plot such that creating a selection in the miniature version adjusts the axis limits in another, more detailed view. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.seattle_weather() zoom = alt.selection_interval(encodings=["x", "y"]) minimap = ( alt.Chart(source) .mark_point() .add_params(zoom) .encode( x="date:T", y="temp_max:Q", color=alt.when(zoom).then("weather").otherwise(alt.value("lightgray")), ) .properties( width=200, height=200, title="Minimap -- click and drag to zoom in the detail view", ) ) detail = ( alt.Chart(source) .mark_point() .encode( x=alt.X( "date:T", scale=alt.Scale(domain={"param": zoom.name, "encoding": "x"}) ), y=alt.Y( "temp_max:Q", scale=alt.Scale(domain={"param": zoom.name, "encoding": "y"}), ), color="weather", ) .properties(width=600, height=400, title="Seattle weather -- detail view") ) detail | minimap ================================================ FILE: tests/examples_arguments_syntax/scatter_with_rolling_mean.py ================================================ """ Scatter Plot with Rolling Mean ------------------------------ A scatter plot with a rolling mean overlay. In this example a 30 day window is used to calculate the mean of the maximum temperature around each date. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.seattle_weather() line = alt.Chart(source).mark_line( color='red', size=3 ).transform_window( rolling_mean='mean(temp_max)', frame=[-15, 15] ).encode( x='date:T', y='rolling_mean:Q' ) points = alt.Chart(source).mark_point().encode( x='date:T', y=alt.Y('temp_max:Q', axis=alt.Axis(title='Max Temp')) ) points + line ================================================ FILE: tests/examples_arguments_syntax/scatter_with_shaded_area.py ================================================ """ Scatter Plot with Shaded Area ----------------------------- This example shows a scatter plot with shaded area, constructed using :ref:`area mark ` and :ref:`rect mark `. """ # category: scatter plots import altair as alt import pandas as pd import numpy as np data = pd.DataFrame({ "x": np.random.uniform(-4, 5, size=50), "y": np.random.uniform(2, 5, size=50), }) rect_data = pd.DataFrame({ "x1": [-2], "x2": [-1] }) # define this interval between y = -x and y = -x df = pd.DataFrame({ "x": range(7), "ymin": range(7), "ymax": range(1,8) }) points = alt.Chart(data).mark_point().encode( x="x", y="y" ) interval = alt.Chart(df).mark_area(opacity=0.3).encode( x="x:Q", y="ymin:Q", y2="ymax:Q" ) rect = alt.Chart(rect_data).mark_rect(opacity=0.3).encode( x="x1", x2="x2", color=alt.ColorValue("#FF0000") ) points + interval + rect ================================================ FILE: tests/examples_arguments_syntax/seattle_weather_interactive.py ================================================ """ Seattle Weather Interactive =========================== This chart provides an interactive exploration of Seattle weather over the course of the year. It includes a one-axis brush selection to easily see the distribution of weather types in a particular date range. """ # category: case studies import altair as alt from altair.datasets import data source = data.seattle_weather() scale = alt.Scale(domain=['sun', 'fog', 'drizzle', 'rain', 'snow'], range=['#e7ba52', '#a7a7a7', '#aec7e8', '#1f77b4', '#9467bd']) color = alt.Color('weather:N', scale=scale) # We create two selections: # - a brush that is active on the top panel # - a multi-click that is active on the bottom panel brush = alt.selection_interval(encodings=['x']) click = alt.selection_point(encodings=['color']) # Top panel is scatter plot of temperature vs time points = alt.Chart().mark_point().encode( alt.X('monthdate(date):T', title='Date'), alt.Y('temp_max:Q', title='Maximum Daily Temperature (C)', scale=alt.Scale(domain=[-5, 40]) ), color=alt.when(brush).then(color).otherwise(alt.value("lightgray")), size=alt.Size('precipitation:Q', scale=alt.Scale(range=[5, 200])) ).properties( width=550, height=300 ).add_params( brush ).transform_filter( click ) # Bottom panel is a bar chart of weather type bars = alt.Chart().mark_bar().encode( x='count()', y='weather:N', color=alt.when(click).then(color).otherwise(alt.value("lightgray")), ).transform_filter( brush ).properties( width=550, ).add_params( click ) alt.vconcat( points, bars, data=source, title="Seattle Weather: 2012-2015" ) ================================================ FILE: tests/examples_arguments_syntax/select_detail.py ================================================ """ Selection Detail ================ This example shows a selection that links two views of data: the left panel contains one point per object, and the right panel contains one line per object. Clicking on either the points or lines will select the corresponding objects in both views of the data. The challenge lies in expressing such hierarchical data in a way that Altair can handle. We do this by merging the data into a "long form" dataframe, and aggregating identical metadata for the final plot. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(0) n_objects = 20 n_times = 50 # Create one (x, y) pair of metadata per object locations = pd.DataFrame({ 'id': range(n_objects), 'x': np.random.randn(n_objects), 'y': np.random.randn(n_objects) }) # Create a 50-element time-series for each object timeseries = pd.DataFrame(np.random.randn(n_times, n_objects).cumsum(0), columns=locations['id'], index=pd.RangeIndex(0, n_times, name='time')) # Melt the wide-form timeseries into a long-form view timeseries = timeseries.reset_index().melt('time') # Merge the (x, y) metadata into the long-form view timeseries['id'] = timeseries['id'].astype(int) # make merge not complain data = pd.merge(timeseries, locations, on='id') # Data is prepared, now make a chart selector = alt.selection_point(fields=['id']) color = ( alt.when(selector) .then(alt.Color("id:O", legend=None)) .otherwise(alt.value("lightgray")) ) base = alt.Chart(data).properties( width=250, height=250 ).add_params(selector) points = base.mark_point(filled=True, size=200).encode( x='mean(x)', y='mean(y)', color=color, ) line = base.mark_line().encode( x='time', y=alt.Y('value', scale=alt.Scale(domain=(-15, 15))), color=alt.Color('id:O', legend=None) ).transform_filter( selector ) points | line ================================================ FILE: tests/examples_arguments_syntax/select_mark_area.py ================================================ """ Using Selection Interval with mark_area ========================================= Because area is considered one object, just using the plain selector will select the entire area instead of just one part of it. This example shows how to use two areas, one on top of the other, and a `transform_filter` to fake out this effect. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url base = alt.Chart(source).mark_area( color='goldenrod', opacity=0.3 ).encode( x='yearmonth(date):T', y='sum(count):Q', ) brush = alt.selection_interval(encodings=['x']) background = base.add_params(brush) selected = base.transform_filter(brush).mark_area(color='goldenrod') background + selected ================================================ FILE: tests/examples_arguments_syntax/selection_histogram.py ================================================ """ Selection Histogram =================== This chart shows an example of using an interval selection to filter the contents of an attached histogram, allowing the user to see the proportion of items in each category within the selection. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.cars() brush = alt.selection_interval() points = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then("Origin:N").otherwise(alt.value("lightgray")) ).add_params( brush ) bars = alt.Chart(source).mark_bar().encode( y='Origin:N', color='Origin:N', x='count(Origin):Q' ).transform_filter( brush ) points & bars ================================================ FILE: tests/examples_arguments_syntax/selection_layer_bar_month.py ================================================ """ Interactive Average =================== The plot below uses an interval selection, which causes the chart to include an interactive brush (shown in grey). The brush selection parameterizes the red guideline, which visualizes the average value within the selected interval. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.seattle_weather() brush = alt.selection_interval(encodings=['x']) bars = alt.Chart().mark_bar().encode( x='month(date):O', y='mean(precipitation):Q', opacity = alt.when(brush).then(alt.value(1)).otherwise(alt.value(0.7)), ).add_params( brush ) line = alt.Chart().mark_rule(color='firebrick').encode( y='mean(precipitation):Q', size=alt.SizeValue(3) ).transform_filter( brush ) alt.layer(bars, line, data=source) ================================================ FILE: tests/examples_arguments_syntax/selection_zorder.py ================================================ """ Selection zorder ================ This example shows how to bring selected points to the front/foreground by using a condition to change the point's (z)order as it is hovered over with the pointer. This prevents that the selected points are obscured by those that are not selected. """ # category: interactive charts import altair as alt from altair.datasets import data cars = data.cars.url hover = alt.selection_point(on='pointerover', nearest=True, empty=False) when_hover = alt.when(hover) chart = alt.Chart(cars, title='Selection obscured by other points').mark_circle(opacity=1).encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=when_hover.then(alt.value("coral")).otherwise(alt.value("lightgray")), size=when_hover.then(alt.value(300)).otherwise(alt.value(30)) ).add_params( hover ) chart | chart.encode( order=when_hover.then(alt.value(1)).otherwise(alt.value(0)) ).properties( title='Selection brought to front' ) ================================================ FILE: tests/examples_arguments_syntax/simple_bar_chart.py ================================================ """ Simple Bar Chart ================ This example shows a basic bar chart created with Altair. """ # category: simple charts import altair as alt import pandas as pd source = pd.DataFrame({ 'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'], 'b': [28, 55, 43, 91, 81, 53, 19, 87, 52] }) alt.Chart(source).mark_bar().encode( x='a', y='b' ) ================================================ FILE: tests/examples_arguments_syntax/simple_heatmap.py ================================================ """ Simple Heatmap -------------- This example shows a simple heatmap for showing gridded data. """ # category: simple charts import altair as alt import numpy as np import pandas as pd # Compute x^2 + y^2 across a 2D grid x, y = np.meshgrid(range(-5, 5), range(-5, 5)) z = x ** 2 + y ** 2 # Convert this grid to columnar data expected by Altair source = pd.DataFrame({'x': x.ravel(), 'y': y.ravel(), 'z': z.ravel()}) alt.Chart(source).mark_rect().encode( x='x:O', y='y:O', color='z:Q' ) ================================================ FILE: tests/examples_arguments_syntax/simple_histogram.py ================================================ """ Simple Histogram ---------------- This example shows how to make a basic histogram, based on the vega-lite docs https://vega.github.io/vega-lite/examples/histogram.html """ # category: simple charts import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( alt.X("IMDB Rating:Q", bin=True), y='count()', ) ================================================ FILE: tests/examples_arguments_syntax/simple_line_chart.py ================================================ """ Simple Line Chart ----------------- This chart shows the most basic line chart, made from a dataframe with two columns. """ # category: simple charts import altair as alt import numpy as np import pandas as pd x = np.arange(100) source = pd.DataFrame({ 'x': x, 'f(x)': np.sin(x / 5) }) alt.Chart(source).mark_line().encode( x='x', y='f(x)' ) ================================================ FILE: tests/examples_arguments_syntax/simple_scatter_with_errorbars.py ================================================ """ Simple Scatter Plot with Errorbars ---------------------------------- A simple scatter plot of a data set with errorbars. """ # category: uncertainties and trends import altair as alt import pandas as pd import numpy as np # generate some data points with uncertainties np.random.seed(0) x = [1, 2, 3, 4, 5] y = np.random.normal(10, 0.5, size=len(x)) yerr = 0.2 # set up data frame source = pd.DataFrame({"x": x, "y": y, "yerr": yerr}) # the base chart base = alt.Chart(source).transform_calculate( ymin="datum.y-datum.yerr", ymax="datum.y+datum.yerr" ) # generate the points points = base.mark_point( filled=True, size=50, color='black' ).encode( x=alt.X('x', scale=alt.Scale(domain=(0, 6))), y=alt.Y('y', scale=alt.Scale(zero=False)) ) # generate the error bars errorbars = base.mark_errorbar().encode( x="x", y="ymin:Q", y2="ymax:Q" ) points + errorbars ================================================ FILE: tests/examples_arguments_syntax/simple_stacked_area_chart.py ================================================ """ Simple Stacked Area Chart ------------------------- This example shows how to make a simple stacked area chart. """ # category: simple charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_area().encode( x="year:T", y="net_generation:Q", color="source:N" ) ================================================ FILE: tests/examples_arguments_syntax/slider_cutoff.py ================================================ """ Slider Cutoff ============= This example shows how to bind a variable parameter to a slider, and how to use the corresponding bound value to color data points. This example is based on an example from the Altair 4 documentation for Interactions, in which the interactivity was accomplished using a selection. The version below has been simplified significantly through the use of a variable parameter. Variable parameters were added in Altair 5. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np rand = np.random.RandomState(42) df = pd.DataFrame({ 'xval': range(100), 'yval': rand.randn(100).cumsum() }) slider = alt.binding_range(min=0, max=100, step=1) cutoff = alt.param(bind=slider, value=50) predicate = alt.datum.xval < cutoff alt.Chart(df).mark_point().encode( x='xval', y='yval', color=alt.when(predicate).then(alt.value("red")).otherwise(alt.value("blue")), ).add_params( cutoff ) ================================================ FILE: tests/examples_arguments_syntax/slope_graph.py ================================================ """ Slope Graph ----------------------- This example shows how to make Slope Graph. """ # category: line charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_line().encode( x='year:O', y='median(yield)', color='site' ) ================================================ FILE: tests/examples_arguments_syntax/sorted_error_bars_with_ci.py ================================================ """ Sorted Error Bars showing Confidence Interval ============================================= This example shows how to show error bars using confidence intervals, while also sorting the y-axis based on x-axis values. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.barley() points = alt.Chart(source).mark_point( filled=True, color='black' ).encode( x=alt.X('mean(yield)', title='Barley Yield'), y=alt.Y( 'variety', sort=alt.EncodingSortField( field='yield', op='mean', order='descending' ) ) ).properties( width=400, height=250 ) error_bars = points.mark_rule().encode( x='ci0(yield)', x2='ci1(yield)', ) points + error_bars ================================================ FILE: tests/examples_arguments_syntax/stacked_bar_chart.py ================================================ """ Stacked Bar Chart ----------------- This is an example of a stacked bar chart using data which contains crop yields over different regions and different years in the 1930s. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='variety', y='sum(yield)', color='site' ) ================================================ FILE: tests/examples_arguments_syntax/stacked_bar_chart_sorted_segments.py ================================================ """ Stacked Bar Chart with Sorted Segments -------------------------------------- This is an example of a stacked-bar chart with the segments of each bar resorted. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield)', y='variety', color='site', order=alt.Order( # Sort the segments of the bars by this field 'site', sort='ascending' ) ) ================================================ FILE: tests/examples_arguments_syntax/stacked_bar_chart_with_text.py ================================================ """ Stacked Bar Chart with Text Overlay =================================== This example shows how to overlay text on a stacked bar chart. For both the bar and text marks, we use the ``stack`` argument in the ``x`` encoding to cause the values to be stacked horizontally. """ # category: bar charts import altair as alt from altair.datasets import data source=data.barley() bars = alt.Chart(source).mark_bar().encode( x=alt.X('sum(yield):Q', stack='zero'), y=alt.Y('variety:N'), color=alt.Color('site') ) text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode( x=alt.X('sum(yield):Q', stack='zero'), y=alt.Y('variety:N'), detail='site:N', text=alt.Text('sum(yield):Q', format='.1f') ) bars + text ================================================ FILE: tests/examples_arguments_syntax/stem_and_leaf.py ================================================ """ Stem and Leaf Plot ------------------ This example shows how to make a stem and leaf plot. """ # category: advanced calculations import altair as alt import pandas as pd import numpy as np np.random.seed(42) # Generating random data source = pd.DataFrame({'samples': np.random.normal(50, 15, 100).astype(int).astype(str)}) # Splitting stem and leaf source['stem'] = source['samples'].str[:-1] source['leaf'] = source['samples'].str[-1] source = source.sort_values(by=['stem', 'leaf']) # Determining leaf position source['position'] = source.groupby('stem').cumcount().add(1) # Creating stem and leaf plot alt.Chart(source).mark_text( align='left', baseline='middle', dx=-5 ).encode( alt.X('position:Q', title='', axis=alt.Axis(ticks=False, labels=False, grid=False) ), alt.Y('stem:N', title='', axis=alt.Axis(tickSize=0)), text='leaf:N', ).configure_axis( labelFontSize=20 ).configure_text( fontSize=20 ) ================================================ FILE: tests/examples_arguments_syntax/step_chart.py ================================================ """ Step Chart ---------- This example shows Google's stock price over time. This uses the "step-after" interpolation scheme. The full list of interpolation options includes 'linear', 'linear-closed', 'step', 'step-before', 'step-after', 'basis', 'basis-open', 'basis-closed', 'cardinal', 'cardinal-open', 'cardinal-closed', 'bundle', and 'monotone'. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).mark_line(interpolate='step-after').encode( x='date', y='price' ).transform_filter( alt.datum.symbol == 'GOOG' ) ================================================ FILE: tests/examples_arguments_syntax/streamgraph.py ================================================ """ Streamgraph ----------------- This example shows the streamgraph from vega-lite examples. """ # category: area charts import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( alt.X('yearmonth(date):T', axis=alt.Axis(format='%Y', domain=False, tickSize=0) ), alt.Y('sum(count):Q', stack='center', axis=None), alt.Color('series:N', scale=alt.Scale(scheme='category20b') ) ).interactive() ================================================ FILE: tests/examples_arguments_syntax/strip_plot.py ================================================ """ Simple Strip Plot ----------------- A simple example of how to make a strip plot. """ # category: simple charts import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_tick().encode( x='Horsepower:Q', y='Cylinders:O' ) ================================================ FILE: tests/examples_arguments_syntax/strip_plot_jitter.py ================================================ """ Strip Plot with Jitter ---------------------- In this chart, we encode the ``Major_Genre`` column from the ``movies`` dataset in the ``y``-channel. In the default presentation of this data, it would be difficult to gauge the relative frequencies with which different values occur because there would be so much overlap. To address this, we use the ``yOffset`` channel to incorporate a random offset (jittering). The example is shown twice, on the left side using normally distributed and on the right side using uniformally distributed jitter. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode( y="Major Genre:N", x="IMDB Rating:Q", yOffset="jitter:Q", color=alt.Color('Major Genre:N', legend=None) ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter="sqrt(-2*log(random()))*cos(2*PI*random())" ) uniform_jitter = gaussian_jitter.transform_calculate( # Generate uniform jitter jitter='random()' ).encode( y=alt.Y('Major Genre:N', axis=None) ).properties( title='Uniformly distributed jitter' ) (gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent') ================================================ FILE: tests/examples_arguments_syntax/table_bubble_plot_github.py ================================================ """ Table Bubble Plot (Github Punch Card) ------------------------------------- This example shows github contributions by the day of week and hour of the day. """ # category: distributions import altair as alt from altair.datasets import data source = data.github.url alt.Chart(source).mark_circle().encode( x='hours(time):O', y='day(time):O', size='sum(count):Q' ) ================================================ FILE: tests/examples_arguments_syntax/top_k_items.py ================================================ """ Top K Items ----------- This example shows how to use the window and transformation filter to display the Top items of a long list of items in decreasing order. Here we sort the top 10 highest ranking movies of IMDB. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.movies.url # Top 10 movies by IMBD rating alt.Chart( source, ).mark_bar().encode( x=alt.X('Title:N', sort='-y'), y=alt.Y('IMDB Rating:Q'), color=alt.Color('IMDB Rating:Q') ).transform_window( rank='rank(IMDB Rating)', sort=[alt.SortField('IMDB Rating', order='descending')] ).transform_filter( (alt.datum.rank < 10) ) ================================================ FILE: tests/examples_arguments_syntax/top_k_letters.py ================================================ """ Top K Letters ------------- This example shows how to use a window transform in order to display only the top K categories by number of entries. In this case, we rank the characters in the first paragraph of Dickens' *A Tale of Two Cities* by number of occurrences. """ # category: advanced calculations import altair as alt import pandas as pd import numpy as np # Excerpt from A Tale of Two Cities; public domain text text = """ It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. """ source = pd.DataFrame( {'letters': np.array([c for c in text if c.isalpha()])} ) alt.Chart(source).transform_aggregate( count='count()', groupby=['letters'] ).transform_window( rank='rank(count)', sort=[alt.SortField('count', order='descending')] ).transform_filter( alt.datum.rank < 10 ).mark_bar().encode( y=alt.Y('letters:N', sort='-x'), x='count:Q', ) ================================================ FILE: tests/examples_arguments_syntax/top_k_with_others.py ================================================ """ Top-K Plot with Others ---------------------- This example shows how to use aggregate, window, and calculate transformations to display the top-k directors by average worldwide gross while grouping the remaining directors as 'All Others'. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( x=alt.X("aggregate_gross:Q", aggregate="mean", title=None), y=alt.Y( "ranked_director:N", sort=alt.Sort(op="mean", field="aggregate_gross", order="descending"), title=None, ), ).transform_aggregate( aggregate_gross='mean(Worldwide Gross)', groupby=["Director"], ).transform_window( rank='row_number()', sort=[alt.SortField("aggregate_gross", order="descending")], ).transform_calculate( ranked_director="datum.rank < 10 ? datum.Director : 'All Others'" ).properties( title="Top Directors by Average Worldwide Gross", ) ================================================ FILE: tests/examples_arguments_syntax/trail_marker.py ================================================ """ Line Chart with Varying Size ---------------------------- This is example of using the ``trail`` marker to vary the size of a line. """ # category: line charts import altair as alt from altair.datasets import data source = data.wheat() alt.Chart(source).mark_trail().encode( x='year:T', y='wheat:Q', size='wheat:Q' ) ================================================ FILE: tests/examples_arguments_syntax/us_employment.py ================================================ """ The U.S. Employment Crash During the Great Recession ---------------------------------------------------- This example is a fully developed bar chart with negative values using the sample dataset of U.S. employment changes during the Great Recession. """ # category: case studies import altair as alt import pandas as pd from altair.datasets import data source = data.us_employment() presidents = pd.DataFrame([ { "start": "2006-01-01", "end": "2009-01-19", "president": "Bush" }, { "start": "2009-01-20", "end": "2015-12-31", "president": "Obama" } ]) predicate = alt.datum.nonfarm_change > 0 bars = alt.Chart( source, title="The U.S. employment crash during the Great Recession" ).mark_bar().encode( x=alt.X("month:T", title=""), y=alt.Y("nonfarm_change:Q", title="Change in non-farm employment (in thousands)"), color=alt.when(predicate).then(alt.value("steelblue")).otherwise(alt.value("orange")), ) rule = alt.Chart(presidents).mark_rule( color="black", strokeWidth=2 ).encode( x='end:T' ).transform_filter(alt.datum.president == "Bush") text = alt.Chart(presidents).mark_text( align='left', baseline='middle', dx=7, dy=-135, size=11 ).encode( x='start:T', text='president', color=alt.value('#000000') ) (bars + rule + text).properties(width=600) ================================================ FILE: tests/examples_arguments_syntax/us_incomebrackets_by_state_facet.py ================================================ """ US Income by State: Wrapped Facet --------------------------------- This example shows how to create a map of income in the US by state, faceted over income brackets """ # category: maps import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, 'states') source = data.income.url alt.Chart(source).mark_geoshape().encode( shape='geo:G', color='pct:Q', tooltip=['name:N', 'pct:Q'], facet=alt.Facet('group:N', columns=2), ).transform_lookup( lookup='id', from_=alt.LookupData(data=states, key='id'), as_='geo' ).properties( width=300, height=175, ).project( type='albersUsa' ) ================================================ FILE: tests/examples_arguments_syntax/us_population_over_time.py ================================================ """ US Population by Age and Sex ============================ This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. """ # category: case studies import altair as alt from altair.datasets import data source = data.population.url select_year = alt.selection_point( name="Year", fields=["year"], bind=alt.binding_range(min=1900, max=2000, step=10, name="Year"), value=2000, ) alt.Chart(source).mark_bar().encode( x=alt.X("sex:N", axis=alt.Axis(labels=False, title=None, ticks=False)), y=alt.Y("people:Q", scale=alt.Scale(domain=(0, 12000000)), title="Population"), color=alt.Color( "sex:N", scale=alt.Scale(domain=("Male", "Female"), range=["steelblue", "salmon"]), title="Sex", ), column=alt.Column("age:O", title="Age"), ).properties( width=20, title="U.S. Population by Age and Sex" ).add_params( select_year ).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female") ).transform_filter( select_year ).configure_facet( spacing=8 ) ================================================ FILE: tests/examples_arguments_syntax/us_population_over_time_facet.py ================================================ """ US Population: Wrapped Facet ============================ This chart visualizes the age distribution of the US population over time, using a wrapped faceting of the data by decade. """ # category: case studies import altair as alt from altair.datasets import data source = data.population.url alt.Chart(source).mark_area().encode( x='age:O', y=alt.Y( 'sum(people):Q', title='Population', axis=alt.Axis(format='~s') ), facet=alt.Facet('year:O', columns=5), ).properties( title='US Age Distribution By Year', width=90, height=80 ) ================================================ FILE: tests/examples_arguments_syntax/us_population_pyramid_over_time.py ================================================ ''' US Population Pyramid Over Time =============================== A population pyramid shows the distribution of age groups within a population. It uses a slider widget that is bound to the year to visualize the age distribution over time. ''' # category: case studies import altair as alt from altair.datasets import data source = data.population.url slider = alt.binding_range(min=1850, max=2000, step=10) select_year = alt.selection_point(name='year', fields=['year'], bind=slider, value=2000) base = alt.Chart(source).add_params( select_year ).transform_filter( select_year ).transform_calculate( gender=alt.expr.if_(alt.datum.sex == 1, 'Male', 'Female') ).properties( width=250 ) color_scale = alt.Scale(domain=['Male', 'Female'], range=['#1f77b4', '#e377c2']) left = base.transform_filter( alt.datum.gender == 'Female' ).encode( y=alt.Y('age:O', axis=None), x=alt.X('sum(people):Q', title='population', sort=alt.SortOrder('descending')), color=alt.Color('gender:N', scale=color_scale, legend=None) ).mark_bar().properties(title='Female') middle = base.encode( y=alt.Y('age:O', axis=None), text=alt.Text('age:Q'), ).mark_text().properties(width=20) right = base.transform_filter( alt.datum.gender == 'Male' ).encode( y=alt.Y('age:O', axis=None), x=alt.X('sum(people):Q', title='population'), color=alt.Color('gender:N', scale=color_scale, legend=None) ).mark_bar().properties(title='Male') alt.concat(left, middle, right, spacing=5) ================================================ FILE: tests/examples_arguments_syntax/us_state_capitals.py ================================================ """ U.S. State Capitals Overlaid on a Map of the U.S ------------------------------------------------- This is a layered geographic visualization that shows US capitals overlaid on a map. """ # category: case studies import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, 'states') capitals = data.us_state_capitals.url # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( title='US State Capitols', width=650, height=400 ).project('albersUsa') # Points and text hover = alt.selection_point(on='pointerover', nearest=True, fields=['lat', 'lon']) base = alt.Chart(capitals).encode( longitude='lon:Q', latitude='lat:Q', ) text = base.mark_text(dy=-5, align='right').encode( alt.Text('city', type='nominal'), opacity=alt.when(~hover).then(alt.value(0)).otherwise(alt.value(1)) ) points = base.mark_point().encode( color=alt.value('black'), size=alt.when(~hover).then(alt.value(30)).otherwise(alt.value(100)) ).add_params(hover) background + points + text ================================================ FILE: tests/examples_arguments_syntax/violin_plot.py ================================================ """ Violin Plot ----------- This example shows how to make a Violin Plot using Altair's density transform. """ # category: distributions import altair as alt from altair.datasets import data alt.Chart(data.cars()).transform_density( 'Miles_per_Gallon', as_=['Miles_per_Gallon', 'density'], extent=[5, 50], groupby=['Origin'] ).mark_area(orient='horizontal').encode( y='Miles_per_Gallon:Q', color='Origin:N', x=alt.X( 'density:Q', stack='center', impute=None, title=None, axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True), ), column=alt.Column( 'Origin:N', header=alt.Header( titleOrient='bottom', labelOrient='bottom', labelPadding=0, ), ) ).properties( width=100 ).configure_facet( spacing=0 ).configure_view( stroke=None ) ================================================ FILE: tests/examples_arguments_syntax/waterfall_chart.py ================================================ """ Waterfall Chart --------------- This example shows how to recreate a Vega-Lite implementation of a waterfall chart. Original inspiration is from https://vega.github.io/vega-lite/examples/waterfall_chart.html """ # category: advanced calculations import altair as alt import pandas as pd data = [ {"label": "Begin", "amount": 4000}, {"label": "Jan", "amount": 1707}, {"label": "Feb", "amount": -1425}, {"label": "Mar", "amount": -1030}, {"label": "Apr", "amount": 1812}, {"label": "May", "amount": -1067}, {"label": "Jun", "amount": -1481}, {"label": "Jul", "amount": 1228}, {"label": "Aug", "amount": 1176}, {"label": "Sep", "amount": 1146}, {"label": "Oct", "amount": 1205}, {"label": "Nov", "amount": -1388}, {"label": "Dec", "amount": 1492}, {"label": "End", "amount": 0}, ] source = pd.DataFrame(data) # Define frequently referenced fields amount = alt.datum.amount label = alt.datum.label window_lead_label = alt.datum.window_lead_label window_sum_amount = alt.datum.window_sum_amount # Define frequently referenced/long expressions calc_prev_sum = alt.expr.if_(label == "End", 0, window_sum_amount - amount) calc_amount = alt.expr.if_(label == "End", window_sum_amount, amount) calc_text_amount = ( alt.expr.if_((label != "Begin") & (label != "End") & calc_amount > 0, "+", "") + calc_amount ) # The "base_chart" defines the transform_window, transform_calculate, and X axis base_chart = alt.Chart(source).transform_window( window_sum_amount="sum(amount)", window_lead_label="lead(label)", ).transform_calculate( calc_lead=alt.expr.if_((window_lead_label == None), label, window_lead_label), calc_prev_sum=calc_prev_sum, calc_amount=calc_amount, calc_text_amount=calc_text_amount, calc_center=(window_sum_amount + calc_prev_sum) / 2, calc_sum_dec=alt.expr.if_(window_sum_amount < calc_prev_sum, window_sum_amount, ""), calc_sum_inc=alt.expr.if_(window_sum_amount > calc_prev_sum, window_sum_amount, ""), ).encode( x=alt.X("label:O", axis=alt.Axis(title="Months", labelAngle=0), sort=None) ) color_coding = ( alt.when((label == "Begin") | (label == "End")) .then(alt.value("#878d96")) .when(calc_amount < 0) .then(alt.value("#fa4d56")) .otherwise(alt.value("#24a148")) ) bar = base_chart.mark_bar(size=45).encode( y=alt.Y("calc_prev_sum:Q", title="Amount"), y2=alt.Y2("window_sum_amount:Q"), color=color_coding, ) # The "rule" chart is for the horizontal lines that connect the bars rule = base_chart.mark_rule(xOffset=-22.5, x2Offset=22.5).encode( y="window_sum_amount:Q", x2="calc_lead", ) # Add values as text text_pos_values_top_of_bar = base_chart.mark_text(baseline="bottom", dy=-4).encode( text=alt.Text("calc_sum_inc:N"), y="calc_sum_inc:Q", ) text_neg_values_bot_of_bar = base_chart.mark_text(baseline="top", dy=4).encode( text=alt.Text("calc_sum_dec:N"), y="calc_sum_dec:Q", ) text_bar_values_mid_of_bar = base_chart.mark_text(baseline="middle").encode( text=alt.Text("calc_text_amount:N"), y="calc_center:Q", color=alt.value("white"), ) alt.layer( bar, rule, text_pos_values_top_of_bar, text_neg_values_bot_of_bar, text_bar_values_mid_of_bar ).properties( width=800, height=450 ) ================================================ FILE: tests/examples_arguments_syntax/wheat_wages.py ================================================ """ Wheat and Wages --------------- A recreation of William Playfair's classic chart visualizing the price of wheat, the wages of a mechanic, and the reigning British monarch. This is a more polished version of the simpler chart in :ref:`gallery_bar_and_line_with_dual_axis`. """ # category: case studies import altair as alt import pandas as pd from altair.datasets import data base_wheat = alt.Chart(data.wheat.url).transform_calculate(year_end="+datum.year + 5") base_monarchs = alt.Chart(data.monarchs.url).transform_calculate( offset="((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95", off2="((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95", y="95", x="+datum.start + (+datum.end - +datum.start)/2", ) bars = base_wheat.mark_bar(fill="#aaa", stroke="#999").encode( x=alt.X("year:Q", bin="binned", axis=alt.Axis(format="d", tickCount=5)).scale( zero=False ), y=alt.Y("wheat:Q", axis=alt.Axis(zindex=1)), x2=alt.X2("year_end"), ) section_data = pd.DataFrame( [ {"year": 1600}, {"year": 1650}, {"year": 1700}, {"year": 1750}, {"year": 1800}, ] ) section_line = ( alt.Chart(section_data) .mark_rule(stroke="#000", strokeWidth=0.6, opacity=0.7) .encode(x=alt.X("year")) ) area = base_wheat.mark_area(color="#a4cedb", opacity=0.7).encode( x=alt.X("year:Q"), y=alt.Y("wages:Q") ) area_line_1 = area.mark_line(color="#000", opacity=0.7) area_line_2 = area.mark_line(yOffset=-2, color="#EE8182") top_bars = base_monarchs.mark_bar(stroke="#000").encode( x=alt.X("start:Q"), x2=alt.X2("end"), y=alt.Y("y:Q"), y2=alt.Y2("offset"), fill=alt.Fill( "commonwealth:N", legend=None, scale=alt.Scale(range=["black", "white"]) ), ) top_text = base_monarchs.mark_text(yOffset=14, fontSize=9, fontStyle="italic").encode( x=alt.X("x:Q"), y=alt.Y("off2:Q"), text=alt.Text("name:N") ) ( (bars + section_line + area + area_line_1 + area_line_2 + top_bars + top_text) .properties(width=900, height=400) .configure_axis(title=None, gridColor="white", gridOpacity=0.25, domain=False) .configure_view(stroke="transparent") ) ================================================ FILE: tests/examples_arguments_syntax/wilkinson-dot-plot.py ================================================ """ Wilkinson Dot Plot ------------------ An example of a `Wilkinson Dot Plot `_ """ # category: advanced calculations import altair as alt import pandas as pd source = pd.DataFrame( {"data":[1,1,1,1,1,1,1,1,1,1, 2,2,2, 3,3, 4,4,4,4,4,4] } ) alt.Chart(source).mark_circle(opacity=1).transform_window( id='rank()', groupby=['data'] ).encode( alt.X('data:O'), alt.Y('id:O', axis=None, sort='descending') ).properties(height=100) ================================================ FILE: tests/examples_arguments_syntax/wind_vector_map.py ================================================ """ Wind Vector Map --------------- An example showing a vector array map showing wind speed and direction using ``wedge`` as shape for ``mark_point`` and ``angle`` encoding for the wind direction. This is adapted from this corresponding Vega-Lite Example: `Wind Vector Map `_ with an added base map. """ # category: maps import altair as alt from altair.datasets import data df_wind = data.windvectors() data_world = alt.topo_feature(data.world_110m.url, "countries") wedge = ( alt.Chart(df_wind) .mark_point(shape="wedge", filled=True) .encode( latitude="latitude", longitude="longitude", color=alt.Color( "dir", scale=alt.Scale(domain=[0, 360], scheme="rainbow"), legend=None ), angle=alt.Angle("dir", scale=alt.Scale(domain=[0, 360], range=[180, 540])), size=alt.Size("speed", scale=alt.Scale(rangeMax=500)), ) .project("equalEarth") ) xmin, xmax, ymin, ymax = ( df_wind.longitude.min(), df_wind.longitude.max(), df_wind.latitude.min(), df_wind.latitude.max(), ) # extent as feature or featurecollection extent = { "type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[ [xmax, ymax], [xmax, ymin], [xmin, ymin], [xmin, ymax], [xmax, ymax]]] }, "properties": {} } # use fit combined with clip=True base = ( alt.Chart(data_world) .mark_geoshape(clip=True, fill="lightgray", stroke="black", strokeWidth=0.5) .project(type="equalEarth", fit=extent) ) base + wedge ================================================ FILE: tests/examples_arguments_syntax/window_rank.py ================================================ """ Window Rank Line Chart ---------------------- This example shows the Group F rankings in the 2018 World Cup after each matchday. A window transformation is used to rank each after each match day, sorting by points and difference. """ # category: line charts import altair as alt import pandas as pd source = pd.DataFrame( [ {"team": "Germany", "matchday": 1, "point": 0, "diff": -1}, {"team": "Germany", "matchday": 2, "point": 3, "diff": 0}, {"team": "Germany", "matchday": 3, "point": 3, "diff": -2}, {"team": "Mexico", "matchday": 1, "point": 3, "diff": 1}, {"team": "Mexico", "matchday": 2, "point": 6, "diff": 2}, {"team": "Mexico", "matchday": 3, "point": 6, "diff": -1}, {"team": "South Korea", "matchday": 1, "point": 0, "diff": -1}, {"team": "South Korea", "matchday": 2, "point": 0, "diff": -2}, {"team": "South Korea", "matchday": 3, "point": 3, "diff": 0}, {"team": "Sweden", "matchday": 1, "point": 3, "diff": 1}, {"team": "Sweden", "matchday": 2, "point": 3, "diff": 0}, {"team": "Sweden", "matchday": 3, "point": 6, "diff": 3}, ] ) color_scale = alt.Scale( domain=["Germany", "Mexico", "South Korea", "Sweden"], range=["#000000", "#127153", "#C91A3C", "#0C71AB"], ) alt.Chart(source).mark_line().encode( x="matchday:O", y="rank:O", color=alt.Color("team:N", scale=color_scale) ).transform_window( rank="rank()", sort=[ alt.SortField("point", order="descending"), alt.SortField("diff", order="descending"), ], groupby=["matchday"], ).properties(title="World Cup 2018: Group F Rankings") ================================================ FILE: tests/examples_arguments_syntax/world_map.py ================================================ """ World Map --------- This example shows how to create a world map using data generators for different background layers. """ # category: maps import altair as alt from altair.datasets import data # Data generators for the background sphere = alt.sphere() graticule = alt.graticule() # Source of land data source = alt.topo_feature(data.world_110m.url, 'countries') # Layering and configuring the components alt.layer( alt.Chart(sphere).mark_geoshape(fill='lightblue'), alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5), alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black') ).project( 'naturalEarth1' ).properties(width=600, height=400).configure_view(stroke=None) ================================================ FILE: tests/examples_arguments_syntax/world_projections.py ================================================ """ World Projections ----------------- This example shows a map of the countries of the world. Use the dropdown menu to compare projections. For more details on the `project` arguments, see the API of `altair.Projection `_. """ # category: maps import altair as alt from altair.datasets import data source = alt.topo_feature(data.world_110m.url, 'countries') input_dropdown = alt.binding_select(options=[ "albers", "albersUsa", "azimuthalEqualArea", "azimuthalEquidistant", "conicEqualArea", "conicEquidistant", "equalEarth", "equirectangular", "gnomonic", "mercator", "naturalEarth1", "orthographic", "stereographic", "transverseMercator" ], name='Projection ') param_projection = alt.param(value="equalEarth", bind=input_dropdown) alt.Chart(source, width=500, height=300).mark_geoshape( fill='lightgray', stroke='gray' ).project( type=alt.expr(param_projection.name) ).add_params(param_projection) ================================================ FILE: tests/examples_methods_syntax/__init__.py ================================================ import os from typing import Set # Set of the names of examples that should have SVG static images. # This is for examples that VlConvert's PNG export does not support. SVG_EXAMPLES: Set[str] = {"isotype_emoji"} def iter_examples_methods_syntax(): """Iterate over the examples in this directory. Each item is a dict with the following keys: - "name" : the unique name of the example - "filename" : the full file path to the example """ examples_methods_syntax_dir = os.path.abspath(os.path.dirname(__file__)) for filename in os.listdir(examples_methods_syntax_dir): name, ext = os.path.splitext(filename) if name.startswith("_") or ext != ".py": continue yield { "name": name, "filename": os.path.join(examples_methods_syntax_dir, filename), "use_svg": name in SVG_EXAMPLES, } ================================================ FILE: tests/examples_methods_syntax/airport_connections.py ================================================ """ Connections Among U.S. Airports Interactive ------------------------------------------- This example shows all the connections between major U.S. airports. Lookup transformations are used to find the coordinates of each airport and connecting airports. Connections are displayed on pointerover via a single selection. """ # category: case studies import altair as alt from altair.datasets import data # Since these data are each more than 5,000 rows we'll import from the URLs airports = data.airports.url flights_airport = data.flights_airport.url states = alt.topo_feature(data.us_10m.url, feature="states") # Create pointerover selection select_city = alt.selection_point( on="pointerover", nearest=True, fields=["origin"], empty=False ) # Define which attributes to lookup from airports.csv lookup_data = alt.LookupData( airports, key="iata", fields=["state", "latitude", "longitude"] ) background = alt.Chart(states).mark_geoshape( fill="lightgray", stroke="white" ).properties( width=750, height=500 ).project("albersUsa") connections = alt.Chart(flights_airport).mark_rule(opacity=0.35).encode( latitude="latitude:Q", longitude="longitude:Q", latitude2="lat2:Q", longitude2="lon2:Q" ).transform_lookup( lookup="origin", from_=lookup_data ).transform_lookup( lookup="destination", from_=lookup_data, as_=["state", "lat2", "lon2"] ).transform_filter( select_city ) points = alt.Chart(flights_airport).mark_circle().encode( latitude="latitude:Q", longitude="longitude:Q", size=alt.Size("routes:Q").legend(None).scale(range=[0, 1000]), order=alt.Order("routes:Q").sort("descending"), tooltip=["origin:N", "routes:Q"] ).transform_aggregate( routes="count()", groupby=["origin"] ).transform_lookup( lookup="origin", from_=lookup_data ).transform_filter( (alt.datum.state != "PR") & (alt.datum.state != "VI") ).add_params( select_city ) (background + connections + points).configure_view(stroke=None) ================================================ FILE: tests/examples_methods_syntax/annual_weather_heatmap.py ================================================ """ Annual Weather Heatmap ---------------------- """ # category: tables import altair as alt from altair.datasets import data source = data.seattle_weather() alt.Chart(source, title="Daily Max Temperatures (C) in Seattle, WA").mark_rect().encode( alt.X("date(date):O").title("Day").axis(format="%e", labelAngle=0), alt.Y("month(date):O").title("Month"), alt.Color("max(temp_max)").title(None), tooltip=[ alt.Tooltip("monthdate(date)", title="Date"), alt.Tooltip("max(temp_max)", title="Max Temp"), ], ).configure_view( step=13, strokeWidth=0 ).configure_axis( domain=False ) ================================================ FILE: tests/examples_methods_syntax/anscombe_plot.py ================================================ """ Anscombe's Quartet ------------------ `Anscombe's Quartet `_ is a famous dataset constructed by Francis Anscombe. It is made of 4 different subsets of data. Each subset has very different characteristics, even though common summary statistics such as mean and variance are identical. This example shows how to make a faceted plot, with each facet showing a different subset of the data. """ # category: case studies import altair as alt from altair.datasets import data source = data.anscombe() alt.Chart(source).mark_circle().encode( alt.X("X").scale(zero=False), alt.Y("Y").scale(zero=False), alt.Facet("Series", columns=2), ).properties( width=180, height=180, ) ================================================ FILE: tests/examples_methods_syntax/area_faceted.py ================================================ """ Faceted Area Chart ------------------ Multiple area subcharts, one for each company. We also show filtering out one of the companies, and sorting the companies in a custom order. """ # category: area charts import altair as alt from altair.datasets import data source = data.stocks() alt.Chart(source).transform_filter(alt.datum.symbol != "GOOG").mark_area().encode( x="date:T", y="price:Q", color="symbol:N", row=alt.Row("symbol:N").sort(["MSFT", "AAPL", "IBM", "AMZN"]), ).properties(height=50, width=400) ================================================ FILE: tests/examples_methods_syntax/bar_chart_faceted_compact.py ================================================ """ Compact Faceted Grid of Bar Charts ================================== A simple grid of bar charts to compare performance data, one subchart for each subset of the data. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( [ {"a": "a1", "b": "b1", "c": "x", "p": "0.14"}, {"a": "a1", "b": "b1", "c": "y", "p": "0.60"}, {"a": "a1", "b": "b1", "c": "z", "p": "0.03"}, {"a": "a1", "b": "b2", "c": "x", "p": "0.80"}, {"a": "a1", "b": "b2", "c": "y", "p": "0.38"}, {"a": "a1", "b": "b2", "c": "z", "p": "0.55"}, {"a": "a1", "b": "b3", "c": "x", "p": "0.11"}, {"a": "a1", "b": "b3", "c": "y", "p": "0.58"}, {"a": "a1", "b": "b3", "c": "z", "p": "0.79"}, {"a": "a2", "b": "b1", "c": "x", "p": "0.83"}, {"a": "a2", "b": "b1", "c": "y", "p": "0.87"}, {"a": "a2", "b": "b1", "c": "z", "p": "0.67"}, {"a": "a2", "b": "b2", "c": "x", "p": "0.97"}, {"a": "a2", "b": "b2", "c": "y", "p": "0.84"}, {"a": "a2", "b": "b2", "c": "z", "p": "0.90"}, {"a": "a2", "b": "b3", "c": "x", "p": "0.74"}, {"a": "a2", "b": "b3", "c": "y", "p": "0.64"}, {"a": "a2", "b": "b3", "c": "z", "p": "0.19"}, {"a": "a3", "b": "b1", "c": "x", "p": "0.57"}, {"a": "a3", "b": "b1", "c": "y", "p": "0.35"}, {"a": "a3", "b": "b1", "c": "z", "p": "0.49"}, {"a": "a3", "b": "b2", "c": "x", "p": "0.91"}, {"a": "a3", "b": "b2", "c": "y", "p": "0.38"}, {"a": "a3", "b": "b2", "c": "z", "p": "0.91"}, {"a": "a3", "b": "b3", "c": "x", "p": "0.99"}, {"a": "a3", "b": "b3", "c": "y", "p": "0.80"}, {"a": "a3", "b": "b3", "c": "z", "p": "0.37"}, ] ) alt.Chart(source, width=60, height=alt.Step(8)).mark_bar().encode( alt.Y("c:N").axis(None), alt.X("p:Q").title(None).axis(format="%"), alt.Color("c:N").title("settings").legend(orient="bottom", titleOrient="left"), alt.Row("a:N").title("Factor A").header(labelAngle=0), alt.Column("b:N").title("Factor B"), ) ================================================ FILE: tests/examples_methods_syntax/bar_chart_sorted.py ================================================ """ Sorted Bar Chart ================ This example shows a bar chart sorted by a calculated value. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield):Q', y=alt.Y('site:N').sort('-x') ) ================================================ FILE: tests/examples_methods_syntax/bar_chart_with_labels_measured_luminance.py ================================================ """ Bar Chart with Labels based on Measured Luminance ================================================= This example shows a basic horizontal bar chart with labels where the measured luminance to decides if the text overlay is be colored ``black`` or ``white``. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() base = alt.Chart(source).encode( x=alt.X('sum(yield):Q').stack('zero'), y=alt.Y('site:O').sort('-x'), text=alt.Text('sum(yield):Q', format='.0f') ) bars = base.mark_bar( tooltip=alt.expr("luminance(scale('color', datum.sum_yield))") ).encode( color='sum(yield):Q' ) text = base.mark_text( align='right', dx=-3, color=alt.expr("luminance(scale('color', datum.sum_yield)) > 0.5 ? 'black' : 'white'") ) bars + text ================================================ FILE: tests/examples_methods_syntax/bar_chart_with_range.py ================================================ """ Bar Chart with Range ==================== This example shows a range bar chart where each bar displays information of a low and high value. """ # category: bar charts import altair as alt from altair.datasets import data source = data.seattle_weather() bar = alt.Chart(source).mark_bar(cornerRadius=10, height=10).encode( x=alt.X('min(temp_min):Q').scale(domain=[-15, 45]).title('Temperature (°C)'), x2='max(temp_max):Q', y=alt.Y('month(date):O').title(None) ) text_min = alt.Chart(source).mark_text(align='right', dx=-5).encode( x='min(temp_min):Q', y=alt.Y('month(date):O'), text='min(temp_min):Q' ) text_max = alt.Chart(source).mark_text(align='left', dx=5).encode( x='max(temp_max):Q', y=alt.Y('month(date):O'), text='max(temp_max):Q' ) (bar + text_min + text_max).properties( title=alt.Title(text='Temperature variation by month', subtitle='Seatle weather, 2012-2015') ) ================================================ FILE: tests/examples_methods_syntax/bar_chart_with_single_threshold.py ================================================ """ Bar Chart Highlighting Values beyond a Threshold ------------------------------------------------ This example shows a bar chart highlighting values beyond a threshold. """ # category: bar charts import pandas as pd import altair as alt source = pd.DataFrame({ "Day": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "Value": [55, 112, 65, 38, 80, 138, 120, 103, 395, 200, 72, 51, 112, 175, 131] }) threshold = 300 bars = alt.Chart(source).mark_bar(color="steelblue").encode( x="Day:O", y="Value:Q", ) highlight = bars.mark_bar(color="#e45755").encode( y2=alt.Y2(datum=threshold) ).transform_filter( alt.datum.Value > threshold ) rule = alt.Chart().mark_rule().encode( y=alt.Y(datum=threshold) ) label = rule.mark_text( x="width", dx=-2, align="right", baseline="bottom", text="hazardous" ) (bars + highlight + rule + label) ================================================ FILE: tests/examples_methods_syntax/beckers_barley_facet.py ================================================ """ Becker's Barley Faceted Plot ---------------------------- The example demonstrates the faceted charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique where each row is a different site (i.e. the chart is faceted by site), they identified an anomaly in a widely used agriculatural dataset, where the "Morris" site accidentally had the years 1931 and 1932 swapped. They named this `"The Morris Mistake." `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source, title="The Morris Mistake").mark_point().encode( alt.X('yield:Q') .title("Barley Yield (bushels/acre)") .scale(zero=False) .axis(grid=False), alt.Y('variety:N') .title("") .sort('-x') .axis(grid=True), alt.Color('year:N') .title("Year"), alt.Row('site:N') .title("") .sort(field='yield', op='sum', order='descending') ).properties( height=alt.Step(20) ).configure_view(stroke="transparent") ================================================ FILE: tests/examples_methods_syntax/beckers_barley_wrapped_facet.py ================================================ """ Becker's Barley Wrapped Facet Plot ---------------------------------- The example demonstrates the faceted charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique where each row is a different site (i.e. the chart is faceted by site), they identified an anomaly in a widely used agriculatural dataset, where the "Morris" site accidentally had the years 1931 and 1932 swapped. They named this `"The Morris Mistake." `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.barley.url alt.Chart(source).mark_point().encode( alt.X("median(yield):Q").scale(zero=False), y="variety:O", color="year:N", facet=alt.Facet("site:O", columns=2), ).properties( width=200, height=100, ) ================================================ FILE: tests/examples_methods_syntax/bump_chart.py ================================================ """ Bump Chart ---------- This example shows a bump chart. The data is first grouped into six-month intervals using pandas. The ranks are computed by Altair using a window transform. """ # category: line charts import altair as alt from altair.datasets import data import pandas as pd stocks = data.stocks() source = stocks.groupby([pd.Grouper(key="date", freq="6MS"),"symbol"]).mean().reset_index() alt.Chart(source).mark_line(point=True).encode( x=alt.X("date:O").timeUnit("yearmonth").title("date"), y="rank:O", color=alt.Color("symbol:N") ).transform_window( rank="rank()", sort=[alt.SortField("price", order="descending")], groupby=["date"] ).properties( title="Bump Chart for Stock Prices", width=600, height=150, ) ================================================ FILE: tests/examples_methods_syntax/calculate_residuals.py ================================================ """ Calculate Residuals ------------------- A dot plot showing each movie in the database, and the difference from the average movie rating. The display is sorted by year to visualize everything in sequential order. The graph is for all Movies before 2019. Adapted from `Calculate Residuals `_. """ # category: advanced calculations import altair as alt from altair.datasets import data imdb_rating = alt.datum["IMDB Rating"] source = data.movies.url chart = ( alt.Chart(source) .mark_point() .transform_filter(imdb_rating != None) .transform_filter( alt.FieldRangePredicate("Release Date", [None, 2019], timeUnit="year") ) .transform_joinaggregate(Average_Rating="mean(IMDB Rating)") .transform_calculate(Rating_Delta=imdb_rating - alt.datum.Average_Rating) .encode( x=alt.X("Release Date:T").title("Release Date"), y=alt.Y("Rating_Delta:Q").title("Rating Delta"), color=alt.Color("Rating_Delta:Q").title("Rating Delta").scale(domainMid=0), ) ) chart ================================================ FILE: tests/examples_methods_syntax/candlestick_chart.py ================================================ """ Candlestick Chart ================= A candlestick chart inspired from `Protovis `_. This example shows the performance of the Chicago Board Options Exchange `Volatility Index `_ (VIX) in the summer of 2009. The thick bar represents the opening and closing prices, while the thin bar shows intraday high and low prices; if the index closed higher on a given day, the bars are colored green rather than red. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.ohlc() open_close_color = ( alt.when("datum.open <= datum.close") .then(alt.value("#06982d")) .otherwise(alt.value("#ae1325")) ) base = alt.Chart(source).encode( alt.X('date:T') .axis(format='%m/%d', labelAngle=-45) .title('Date in 2009'), color=open_close_color ) rule = base.mark_rule().encode( alt.Y('low:Q') .title('Price') .scale(zero=False), alt.Y2('high:Q') ) bar = base.mark_bar().encode( alt.Y('open:Q'), alt.Y2('close:Q') ) rule + bar ================================================ FILE: tests/examples_methods_syntax/co2_concentration.py ================================================ """ Atmospheric CO2 Concentration ----------------------------- This example is a fully developed line chart that uses a window transformation. It was inspired by `Gregor Aisch's work at datawrapper `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.co2_concentration.url base = alt.Chart( source, title="Carbon Dioxide in the Atmosphere" ).transform_calculate( year="year(datum.Date)" ).transform_calculate( decade="floor(datum.year / 10)" ).transform_calculate( scaled_date="(datum.year % 10) + (month(datum.Date)/12)" ).transform_window( first_date='first_value(scaled_date)', last_date='last_value(scaled_date)', sort=[{"field": "scaled_date", "order": "ascending"}], groupby=['decade'], frame=[None, None] ).transform_calculate( end=( "datum.first_date === datum.scaled_date ? 'first'" ": datum.last_date === datum.scaled_date ? 'last'" ": null" ) ).encode( alt.X("scaled_date:Q") .title("Year into Decade") .axis(tickCount=11), alt.Y("CO2:Q") .title("CO2 concentration in ppm") .scale(zero=False) ) line = base.mark_line().encode( alt.Color("decade:O") .scale(scheme="magma") .legend(None) ) text = base.encode(text="year:N") start_year = text.transform_filter( alt.datum.end == 'first' ).mark_text(baseline="top") end_year = text.transform_filter( alt.datum.end == 'last' ).mark_text(baseline="bottom") (line + start_year + end_year).configure_text( align="left", dx=1, dy=3 ).properties(width=600, height=375) ================================================ FILE: tests/examples_methods_syntax/comet_chart.py ================================================ """ Comet Chart ----------- Inspired by `Zan Armstrong's comet chart `_ this plot uses ``mark_trail`` to visualize change of grouped data over time. A more elaborate example and explanation of creating comet charts in Altair is shown in `this blogpost `_. """ # category: advanced calculations import altair as alt from altair.datasets import data alt.Chart( data.barley.url, title='Barley Yield comparison between 1932 and 1931' ).mark_trail().encode( alt.X('year:O').title(None), alt.Y('variety:N').title('Variety'), alt.Size('yield:Q') .scale(range=[0, 12]) .legend(values=[20, 60]) .title('Barley Yield (bushels/acre)'), alt.Color('delta:Q') .scale(domainMid=0) .title('Yield Delta (%)'), alt.Tooltip(['year:O', 'yield:Q']), alt.Column('site:N').title('Site') ).transform_pivot( "year", value="yield", groupby=["variety", "site"] ).transform_fold( ["1931", "1932"], as_=["year", "yield"] ).transform_calculate( calculate="datum['1932'] - datum['1931']", as_="delta" ).configure_legend( orient='bottom', direction='horizontal' ).configure_view( stroke=None ) ================================================ FILE: tests/examples_methods_syntax/cumulative_count_chart.py ================================================ """ Cumulative Count Chart ---------------------- This example shows an area chart with cumulative count. Adapted from https://vega.github.io/vega-lite/examples/area_cumulative_freq.html """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).transform_window( cumulative_count="count()", sort=[{"field": "IMDB Rating"}], ).mark_area().encode( x="IMDB Rating:Q", y=alt.Y("cumulative_count:Q").stack(False) ) ================================================ FILE: tests/examples_methods_syntax/density_repeat.py ================================================ """ Repeated Density Estimates -------------------------- Density estimates for each measurement of penguins. This is what we call a "repeated" plot, with one subplot for each measurement type. All measurements are in millimeters, making them directly comparable on a shared x-axis. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).transform_fold( [ "Beak Length (mm)", "Beak Depth (mm)", "Flipper Length (mm)", ], as_=["Measurement Type", "value"], ).transform_density( density="value", groupby=["Measurement Type"] ).mark_area().encode( alt.X("value:Q"), alt.Y("density:Q"), alt.Row("Measurement Type:N").header(labelAngle=0, labelAlign="left") ).properties( width=300, height=50 ) ================================================ FILE: tests/examples_methods_syntax/density_stack.py ================================================ """ Stacked Density Estimates ------------------------- To plot a stacked graph of estimates, use a shared ``extent`` and a fixed number of subdivision ``steps`` to ensure that the points for each area align well. Density estimates of body mass for each penguin species are plotted in a stacked method. In addition, setting ``counts`` to true multiplies the densities by the number of data points in each group, preserving proportional differences. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).transform_density( density='Body Mass (g)', groupby=['Species'], extent= [2500, 6500], counts = True, steps=200 ).mark_area().encode( alt.X('value:Q').title('Body Mass (g)'), alt.Y('density:Q', stack='zero'), alt.Color('Species:N') ).properties( width=400, height=80, title='Distribution of Body Mass of Penguins' ) ================================================ FILE: tests/examples_methods_syntax/deviation_ellipses.py ================================================ """ Confidence Interval Ellipses ---------------------------- This example shows bivariate deviation ellipses of flipper length and body mass of three penguin species. Inspired by `ggplot2.stat_ellipse`_ and directly based on `Deviation ellipses example`_ by `@essicolo`_ .. _ggplot2.stat_ellipse: https://ggplot2.tidyverse.org/reference/stat_ellipse.html#ref-examples .. _Deviation ellipses example: https://github.com/vega/altair/pull/514 .. _@essicolo: https://github.com/essicolo """ # category: case studies import numpy as np import pandas as pd from scipy.stats import f as F import altair as alt from altair.datasets import data def confidence_region_2d(arr, conf_level=0.95, segments=50): """ Calculate confidence interval ellipse. Parameters ---------- arr numpy array with 2 columns conf_level lower tail probability segments number of points describing the ellipse. """ n_elements = len(arr) # Degrees of freedom of the chi-squared distribution in the **numerator** dfn = 2 # Degrees of freedom of the chi-squared distribution in the **denominator** dfd = n_elements - 1 # Percent point function at `conf_level` of an F continuous random variable quantile = F.ppf(conf_level, dfn=dfn, dfd=dfd) radius = np.sqrt(2 * quantile) angles = np.arange(0, segments) * 2 * np.pi / segments circle = np.column_stack((np.cos(angles), np.sin(angles))) center = np.mean(arr, axis=0) cov_mat = np.cov(arr, rowvar=False) return center + radius * (circle @ np.linalg.cholesky(cov_mat).T) def grouped_confidence_regions(df, col_x, col_y, col_group): cols = [col_x, col_y] ellipses = [] ser: pd.Series[float] = df[col_group] for group in ser.drop_duplicates(): arr = df.loc[ser == group, cols].to_numpy(dtype=np.float64) ellipse = pd.DataFrame(confidence_region_2d(arr), columns=cols) ellipse[col_group] = group ellipses.append(ellipse) return pd.concat(ellipses).reset_index(names="order") col_x = "Flipper Length (mm)" col_y = "Body Mass (g)" col_group = "Species" x = alt.X(col_x).scale(zero=False) y = alt.Y(col_y).scale(zero=False) color = alt.Color(col_group) source = data.penguins().dropna(subset=[col_x, col_y, col_group]) ellipse = grouped_confidence_regions(source, col_x=col_x, col_y=col_y, col_group=col_group) points = alt.Chart(source).mark_circle(size=50, tooltip=True).encode( x=x, y=y, color=color ) lines = alt.Chart(ellipse).mark_line(filled=True, fillOpacity=0.2).encode( x=x, y=y, color=color, order="order" ) chart = (lines + points).properties(height=500, width=500) chart ================================================ FILE: tests/examples_methods_syntax/distributions_and_medians_of_likert_scale_ratings.py ================================================ """ Distributions and Medians of Likert Scale Ratings ------------------------------------------------- Distributions and Medians of Likert Scale Ratings. (Figure 9 from @jhoffswell and @zcliu’s ‘Interactive Repair of Tables Extracted from PDF Documents on Mobile Devices’ – http://idl.cs.washington.edu/files/2019-InteractiveTableRepair-CHI.pdf). Adapted from `Distributions and Medians of Likert Scale Ratings `_. """ # category: distributions import altair as alt import pandas as pd medians = pd.DataFrame( [ {"name": "Identify Errors:", "median": 1.999976, "lo": "Easy", "hi": "Hard"}, {"name": "Fix Errors:", "median": 2, "lo": "Easy", "hi": "Hard"}, { "name": "Easier to Fix:", "median": 1.999969, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Faster to Fix:", "median": 2.500045, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Easier on Phone:", "median": 1.500022, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Easier on Tablet:", "median": 2.99998, "lo": "Toolbar", "hi": "Gesture", }, { "name": "Device Preference:", "median": 4.500007, "lo": "Phone", "hi": "Tablet", }, ] ) values = pd.DataFrame( [ {"value": "P1", "name": "Participant ID", "id": "P1"}, {"value": 2, "name": "Identify Errors:", "id": "P1"}, {"value": 2, "name": "Fix Errors:", "id": "P1"}, {"value": 3, "name": "Easier to Fix:", "id": "P1"}, {"value": 4, "name": "Faster to Fix:", "id": "P1"}, {"value": 2, "name": "Easier on Phone:", "id": "P1"}, {"value": 5, "name": "Easier on Tablet:", "id": "P1"}, {"value": 5, "name": "Device Preference:", "id": "P1"}, {"value": 1, "name": "Tablet_First", "id": "P1"}, {"value": 1, "name": "Toolbar_First", "id": "P1"}, {"value": "P2", "name": "Participant ID", "id": "P2"}, {"value": 2, "name": "Identify Errors:", "id": "P2"}, {"value": 3, "name": "Fix Errors:", "id": "P2"}, {"value": 4, "name": "Easier to Fix:", "id": "P2"}, {"value": 5, "name": "Faster to Fix:", "id": "P2"}, {"value": 5, "name": "Easier on Phone:", "id": "P2"}, {"value": 5, "name": "Easier on Tablet:", "id": "P2"}, {"value": 5, "name": "Device Preference:", "id": "P2"}, {"value": 1, "name": "Tablet_First", "id": "P2"}, {"value": 1, "name": "Toolbar_First", "id": "P2"}, {"value": "P3", "name": "Participant ID", "id": "P3"}, {"value": 2, "name": "Identify Errors:", "id": "P3"}, {"value": 2, "name": "Fix Errors:", "id": "P3"}, {"value": 2, "name": "Easier to Fix:", "id": "P3"}, {"value": 1, "name": "Faster to Fix:", "id": "P3"}, {"value": 2, "name": "Easier on Phone:", "id": "P3"}, {"value": 1, "name": "Easier on Tablet:", "id": "P3"}, {"value": 5, "name": "Device Preference:", "id": "P3"}, {"value": 1, "name": "Tablet_First", "id": "P3"}, {"value": 0, "name": "Toolbar_First", "id": "P3"}, {"value": "P4", "name": "Participant ID", "id": "P4"}, {"value": 3, "name": "Identify Errors:", "id": "P4"}, {"value": 3, "name": "Fix Errors:", "id": "P4"}, {"value": 2, "name": "Easier to Fix:", "id": "P4"}, {"value": 2, "name": "Faster to Fix:", "id": "P4"}, {"value": 4, "name": "Easier on Phone:", "id": "P4"}, {"value": 1, "name": "Easier on Tablet:", "id": "P4"}, {"value": 5, "name": "Device Preference:", "id": "P4"}, {"value": 1, "name": "Tablet_First", "id": "P4"}, {"value": 0, "name": "Toolbar_First", "id": "P4"}, {"value": "P5", "name": "Participant ID", "id": "P5"}, {"value": 2, "name": "Identify Errors:", "id": "P5"}, {"value": 2, "name": "Fix Errors:", "id": "P5"}, {"value": 4, "name": "Easier to Fix:", "id": "P5"}, {"value": 4, "name": "Faster to Fix:", "id": "P5"}, {"value": 4, "name": "Easier on Phone:", "id": "P5"}, {"value": 5, "name": "Easier on Tablet:", "id": "P5"}, {"value": 5, "name": "Device Preference:", "id": "P5"}, {"value": 0, "name": "Tablet_First", "id": "P5"}, {"value": 1, "name": "Toolbar_First", "id": "P5"}, {"value": "P6", "name": "Participant ID", "id": "P6"}, {"value": 1, "name": "Identify Errors:", "id": "P6"}, {"value": 3, "name": "Fix Errors:", "id": "P6"}, {"value": 3, "name": "Easier to Fix:", "id": "P6"}, {"value": 4, "name": "Faster to Fix:", "id": "P6"}, {"value": 4, "name": "Easier on Phone:", "id": "P6"}, {"value": 4, "name": "Easier on Tablet:", "id": "P6"}, {"value": 4, "name": "Device Preference:", "id": "P6"}, {"value": 0, "name": "Tablet_First", "id": "P6"}, {"value": 1, "name": "Toolbar_First", "id": "P6"}, {"value": "P7", "name": "Participant ID", "id": "P7"}, {"value": 2, "name": "Identify Errors:", "id": "P7"}, {"value": 3, "name": "Fix Errors:", "id": "P7"}, {"value": 4, "name": "Easier to Fix:", "id": "P7"}, {"value": 5, "name": "Faster to Fix:", "id": "P7"}, {"value": 3, "name": "Easier on Phone:", "id": "P7"}, {"value": 2, "name": "Easier on Tablet:", "id": "P7"}, {"value": 4, "name": "Device Preference:", "id": "P7"}, {"value": 0, "name": "Tablet_First", "id": "P7"}, {"value": 0, "name": "Toolbar_First", "id": "P7"}, {"value": "P8", "name": "Participant ID", "id": "P8"}, {"value": 3, "name": "Identify Errors:", "id": "P8"}, {"value": 1, "name": "Fix Errors:", "id": "P8"}, {"value": 2, "name": "Easier to Fix:", "id": "P8"}, {"value": 4, "name": "Faster to Fix:", "id": "P8"}, {"value": 2, "name": "Easier on Phone:", "id": "P8"}, {"value": 5, "name": "Easier on Tablet:", "id": "P8"}, {"value": 5, "name": "Device Preference:", "id": "P8"}, {"value": 0, "name": "Tablet_First", "id": "P8"}, {"value": 0, "name": "Toolbar_First", "id": "P8"}, {"value": "P9", "name": "Participant ID", "id": "P9"}, {"value": 2, "name": "Identify Errors:", "id": "P9"}, {"value": 3, "name": "Fix Errors:", "id": "P9"}, {"value": 2, "name": "Easier to Fix:", "id": "P9"}, {"value": 4, "name": "Faster to Fix:", "id": "P9"}, {"value": 1, "name": "Easier on Phone:", "id": "P9"}, {"value": 4, "name": "Easier on Tablet:", "id": "P9"}, {"value": 4, "name": "Device Preference:", "id": "P9"}, {"value": 1, "name": "Tablet_First", "id": "P9"}, {"value": 1, "name": "Toolbar_First", "id": "P9"}, {"value": "P10", "name": "Participant ID", "id": "P10"}, {"value": 2, "name": "Identify Errors:", "id": "P10"}, {"value": 2, "name": "Fix Errors:", "id": "P10"}, {"value": 1, "name": "Easier to Fix:", "id": "P10"}, {"value": 1, "name": "Faster to Fix:", "id": "P10"}, {"value": 1, "name": "Easier on Phone:", "id": "P10"}, {"value": 1, "name": "Easier on Tablet:", "id": "P10"}, {"value": 5, "name": "Device Preference:", "id": "P10"}, {"value": 1, "name": "Tablet_First", "id": "P10"}, {"value": 1, "name": "Toolbar_First", "id": "P10"}, {"value": "P11", "name": "Participant ID", "id": "P11"}, {"value": 2, "name": "Identify Errors:", "id": "P11"}, {"value": 2, "name": "Fix Errors:", "id": "P11"}, {"value": 1, "name": "Easier to Fix:", "id": "P11"}, {"value": 1, "name": "Faster to Fix:", "id": "P11"}, {"value": 1, "name": "Easier on Phone:", "id": "P11"}, {"value": 1, "name": "Easier on Tablet:", "id": "P11"}, {"value": 4, "name": "Device Preference:", "id": "P11"}, {"value": 1, "name": "Tablet_First", "id": "P11"}, {"value": 0, "name": "Toolbar_First", "id": "P11"}, {"value": "P12", "name": "Participant ID", "id": "P12"}, {"value": 1, "name": "Identify Errors:", "id": "P12"}, {"value": 3, "name": "Fix Errors:", "id": "P12"}, {"value": 2, "name": "Easier to Fix:", "id": "P12"}, {"value": 3, "name": "Faster to Fix:", "id": "P12"}, {"value": 1, "name": "Easier on Phone:", "id": "P12"}, {"value": 3, "name": "Easier on Tablet:", "id": "P12"}, {"value": 3, "name": "Device Preference:", "id": "P12"}, {"value": 0, "name": "Tablet_First", "id": "P12"}, {"value": 1, "name": "Toolbar_First", "id": "P12"}, {"value": "P13", "name": "Participant ID", "id": "P13"}, {"value": 2, "name": "Identify Errors:", "id": "P13"}, {"value": 2, "name": "Fix Errors:", "id": "P13"}, {"value": 1, "name": "Easier to Fix:", "id": "P13"}, {"value": 1, "name": "Faster to Fix:", "id": "P13"}, {"value": 1, "name": "Easier on Phone:", "id": "P13"}, {"value": 1, "name": "Easier on Tablet:", "id": "P13"}, {"value": 5, "name": "Device Preference:", "id": "P13"}, {"value": 0, "name": "Tablet_First", "id": "P13"}, {"value": 0, "name": "Toolbar_First", "id": "P13"}, {"value": "P14", "name": "Participant ID", "id": "P14"}, {"value": 3, "name": "Identify Errors:", "id": "P14"}, {"value": 3, "name": "Fix Errors:", "id": "P14"}, {"value": 2, "name": "Easier to Fix:", "id": "P14"}, {"value": 2, "name": "Faster to Fix:", "id": "P14"}, {"value": 1, "name": "Easier on Phone:", "id": "P14"}, {"value": 1, "name": "Easier on Tablet:", "id": "P14"}, {"value": 1, "name": "Device Preference:", "id": "P14"}, {"value": 1, "name": "Tablet_First", "id": "P14"}, {"value": 1, "name": "Toolbar_First", "id": "P14"}, {"value": "P15", "name": "Participant ID", "id": "P15"}, {"value": 4, "name": "Identify Errors:", "id": "P15"}, {"value": 5, "name": "Fix Errors:", "id": "P15"}, {"value": 1, "name": "Easier to Fix:", "id": "P15"}, {"value": 1, "name": "Faster to Fix:", "id": "P15"}, {"value": 1, "name": "Easier on Phone:", "id": "P15"}, {"value": 1, "name": "Easier on Tablet:", "id": "P15"}, {"value": 5, "name": "Device Preference:", "id": "P15"}, {"value": 1, "name": "Tablet_First", "id": "P15"}, {"value": 0, "name": "Toolbar_First", "id": "P15"}, {"value": "P16", "name": "Participant ID", "id": "P16"}, {"value": 1, "name": "Identify Errors:", "id": "P16"}, {"value": 3, "name": "Fix Errors:", "id": "P16"}, {"value": 2, "name": "Easier to Fix:", "id": "P16"}, {"value": 2, "name": "Faster to Fix:", "id": "P16"}, {"value": 1, "name": "Easier on Phone:", "id": "P16"}, {"value": 4, "name": "Easier on Tablet:", "id": "P16"}, {"value": 5, "name": "Device Preference:", "id": "P16"}, {"value": 0, "name": "Tablet_First", "id": "P16"}, {"value": 1, "name": "Toolbar_First", "id": "P16"}, {"value": "P17", "name": "Participant ID", "id": "P17"}, {"value": 3, "name": "Identify Errors:", "id": "P17"}, {"value": 2, "name": "Fix Errors:", "id": "P17"}, {"value": 2, "name": "Easier to Fix:", "id": "P17"}, {"value": 2, "name": "Faster to Fix:", "id": "P17"}, {"value": 1, "name": "Easier on Phone:", "id": "P17"}, {"value": 3, "name": "Easier on Tablet:", "id": "P17"}, {"value": 2, "name": "Device Preference:", "id": "P17"}, {"value": 0, "name": "Tablet_First", "id": "P17"}, {"value": 0, "name": "Toolbar_First", "id": "P17"}, ] ) y_axis = alt.Y("name").axis( title=None, offset=50, labelFontWeight="bold", ticks=False, grid=True, domain=False, ) base = alt.Chart( medians, ).encode(y_axis) bubbles = ( alt.Chart(values) .transform_filter( (alt.datum.name != "Toolbar_First") & (alt.datum.name != "Tablet_First") & (alt.datum.name != "Participant ID") ) .mark_circle(color="#6EB4FD") .encode( alt.X( "value:Q", ).title(None), y_axis, alt.Size("count()").legend(offset=75, title="Number of ratings"), tooltip=[alt.Tooltip("count()").title("Number of ratings")], ) ) ticks = base.mark_tick(color="black").encode( alt.X("median:Q") .axis(grid=False, values=[1, 2, 3, 4, 5], format=".0f") .scale(domain=[0, 6]), ) texts_lo = base.mark_text(align="right", x=-5).encode(text="lo") texts_hi = base.mark_text(align="left", x=255).encode(text="hi") (bubbles + ticks + texts_lo + texts_hi).properties( title="Questionnaire Ratings", width=250, height=175 ).configure_view(stroke=None) ================================================ FILE: tests/examples_methods_syntax/distributions_faceted_histogram.py ================================================ """ Faceted Histogram ----------------- This example shows how to make a basic faceted histogram, with one histogram subplot for different subsets of the data. Based off the vega-lite example: https://vega.github.io/vega-lite/examples/trellis_bar_histogram.html """ # category: distributions import altair as alt from altair.datasets import data source = data.cars() alt.Chart(source).mark_bar().encode( alt.X("Horsepower:Q").bin(), y="count()", row="Origin", ) ================================================ FILE: tests/examples_methods_syntax/diverging_stacked_bar_chart.py ================================================ """ Diverging Stacked Bar Chart --------------------------- This example shows a diverging stacked bar chart for sentiments towards a set of eight questions, displayed as percentages with neutral responses straddling the 0% mark. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( [ { "question": "Question 1", "type": "Strongly disagree", "value": 24, }, { "question": "Question 1", "type": "Disagree", "value": 294, }, { "question": "Question 1", "type": "Neither agree nor disagree", "value": 594, }, { "question": "Question 1", "type": "Agree", "value": 1927, }, { "question": "Question 1", "type": "Strongly agree", "value": 376, }, { "question": "Question 2", "type": "Strongly disagree", "value": 2, }, { "question": "Question 2", "type": "Disagree", "value": 2, }, { "question": "Question 2", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 2", "type": "Agree", "value": 7, }, { "question": "Question 2", "type": "Strongly agree", "value": 11, }, { "question": "Question 3", "type": "Strongly disagree", "value": 2, }, { "question": "Question 3", "type": "Disagree", "value": 0, }, { "question": "Question 3", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 3", "type": "Agree", "value": 4, }, { "question": "Question 3", "type": "Strongly agree", "value": 2, }, { "question": "Question 4", "type": "Strongly disagree", "value": 0, }, { "question": "Question 4", "type": "Disagree", "value": 2, }, { "question": "Question 4", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 4", "type": "Agree", "value": 7, }, { "question": "Question 4", "type": "Strongly agree", "value": 6, }, { "question": "Question 5", "type": "Strongly disagree", "value": 0, }, { "question": "Question 5", "type": "Disagree", "value": 1, }, { "question": "Question 5", "type": "Neither agree nor disagree", "value": 3, }, { "question": "Question 5", "type": "Agree", "value": 16, }, { "question": "Question 5", "type": "Strongly agree", "value": 4, }, { "question": "Question 6", "type": "Strongly disagree", "value": 1, }, { "question": "Question 6", "type": "Disagree", "value": 1, }, { "question": "Question 6", "type": "Neither agree nor disagree", "value": 2, }, { "question": "Question 6", "type": "Agree", "value": 9, }, { "question": "Question 6", "type": "Strongly agree", "value": 3, }, { "question": "Question 7", "type": "Strongly disagree", "value": 0, }, { "question": "Question 7", "type": "Disagree", "value": 0, }, { "question": "Question 7", "type": "Neither agree nor disagree", "value": 1, }, { "question": "Question 7", "type": "Agree", "value": 4, }, { "question": "Question 7", "type": "Strongly agree", "value": 0, }, { "question": "Question 8", "type": "Strongly disagree", "value": 0, }, { "question": "Question 8", "type": "Disagree", "value": 0, }, { "question": "Question 8", "type": "Neither agree nor disagree", "value": 0, }, { "question": "Question 8", "type": "Agree", "value": 0, }, { "question": "Question 8", "type": "Strongly agree", "value": 2, }, ] ) # Add type_code that we can sort by source["type_code"] = source["type"].map( { "Strongly disagree": -2, "Disagree": -1, "Neither agree nor disagree": 0, "Agree": 1, "Strongly agree": 2, } ) def compute_percentages( group, ): # Set type_code as index and sort group = group.set_index("type_code").sort_index() # Compute percentage of value with question group perc = (group["value"] / group["value"].sum()) * 100 group["percentage"] = perc # Compute percentage end, centered on "Neither agree nor disagree" (type_code 0) # Note that we access the perc series via index which is based on 'type_code'. group["percentage_end"] = perc.cumsum() - (perc[-2] + perc[-1] + perc[0] / 2) # Compute percentage start by subtracting percent group["percentage_start"] = group["percentage_end"] - perc return group source = source.groupby("question").apply(compute_percentages).reset_index(drop=True) color_scale = alt.Scale( domain=[ "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", ], range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"], ) y_axis = alt.Axis(title="Question", offset=5, ticks=False, minExtent=60, domain=False) alt.Chart(source).mark_bar().encode( x="percentage_start:Q", x2="percentage_end:Q", y=alt.Y("question:N").axis(y_axis), color=alt.Color("type:N").title("Response").scale(color_scale), ) ================================================ FILE: tests/examples_methods_syntax/donut_chart.py ================================================ """ Donut Chart ----------- This example shows how to make a Donut Chart using ``mark_arc``. This is adapted from a corresponding Vega-Lite Example: `Donut Chart `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({ "category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8] }) alt.Chart(source).mark_arc(innerRadius=50).encode( theta="value", color="category:N", ) ================================================ FILE: tests/examples_methods_syntax/errorbars_with_ci.py ================================================ """ Error Bars with Confidence Interval ====================================== This example shows how to show error bars using confidence intervals. The confidence intervals are computed internally in vega by a non-parametric `bootstrap of the mean `_. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent='ci').encode( alt.X('yield').scale(zero=False), alt.Y('variety') ) points = alt.Chart(source).mark_point(filled=True, color='black').encode( x=alt.X('mean(yield)'), y=alt.Y('variety'), ) error_bars + points ================================================ FILE: tests/examples_methods_syntax/errorbars_with_std.py ================================================ """ Error Bars with Standard Deviation ---------------------------------- This example shows how to show error bars with standard deviation using crop yields data of different in the years of 1930s. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.barley() error_bars = alt.Chart(source).mark_errorbar(extent='stdev').encode( x=alt.X('yield').scale(zero=False), y=alt.Y('variety') ) points = alt.Chart(source).mark_point(filled=True, color='black').encode( x=alt.X('mean(yield)'), y=alt.Y('variety'), ) error_bars + points ================================================ FILE: tests/examples_methods_syntax/falkensee.py ================================================ """ Population of Falkensee from 1875 to 2014 ----------------------------------------- This example is a reproduction of the Falkensee plot found in the Vega-Lite examples. """ # category: case studies import altair as alt import pandas as pd source = [ {"year": "1875", "population": 1309}, {"year": "1890", "population": 1558}, {"year": "1910", "population": 4512}, {"year": "1925", "population": 8180}, {"year": "1933", "population": 15915}, {"year": "1939", "population": 24824}, {"year": "1946", "population": 28275}, {"year": "1950", "population": 29189}, {"year": "1964", "population": 29881}, {"year": "1971", "population": 26007}, {"year": "1981", "population": 24029}, {"year": "1985", "population": 23340}, {"year": "1989", "population": 22307}, {"year": "1990", "population": 22087}, {"year": "1991", "population": 22139}, {"year": "1992", "population": 22105}, {"year": "1993", "population": 22242}, {"year": "1994", "population": 22801}, {"year": "1995", "population": 24273}, {"year": "1996", "population": 25640}, {"year": "1997", "population": 27393}, {"year": "1998", "population": 29505}, {"year": "1999", "population": 32124}, {"year": "2000", "population": 33791}, {"year": "2001", "population": 35297}, {"year": "2002", "population": 36179}, {"year": "2003", "population": 36829}, {"year": "2004", "population": 37493}, {"year": "2005", "population": 38376}, {"year": "2006", "population": 39008}, {"year": "2007", "population": 39366}, {"year": "2008", "population": 39821}, {"year": "2009", "population": 40179}, {"year": "2010", "population": 40511}, {"year": "2011", "population": 40465}, {"year": "2012", "population": 40905}, {"year": "2013", "population": 41258}, {"year": "2014", "population": 41777}, ] source2 = [ {"start": "1933", "end": "1945", "event": "Nazi Rule"}, {"start": "1948", "end": "1989", "event": "GDR (East Germany)"}, ] source_df = pd.DataFrame(source) source2_df = pd.DataFrame(source2) line = alt.Chart(source_df).mark_line(color="#333").encode( alt.X("year:T").axis(format="%Y").title("Year"), alt.Y("population").title("Population"), ) point = line.mark_point(color="#333") rect = alt.Chart(source2_df).mark_rect().encode( x="start:T", x2="end:T", color=alt.Color("event:N").title("Event") ) (rect + line + point).properties( title="Population of Falkensee from 1875 to 2014", width=500, height=300 ) ================================================ FILE: tests/examples_methods_syntax/gapminder_bubble_plot.py ================================================ """ Gapminder Bubble Plot ===================== This example shows how to make a bubble plot showing the correlation between health and income for 187 countries in the world (modified from an example in Lisa Charlotte Rost's blog post `'One Chart, Twelve Charting Libraries' `_. """ # category: case studies import altair as alt from altair.datasets import data source = data.gapminder_health_income.url alt.Chart(source).mark_circle().encode( alt.X('income:Q').scale(type='log'), alt.Y('health:Q').scale(zero=False), size='population:Q' ) ================================================ FILE: tests/examples_methods_syntax/groupby-map.py ================================================ """ Grouped Points with Proportional Symbols Map ============================================ This is a layered geographic visualization that groups points by state. """ # category: maps import altair as alt from altair.datasets import data airports = data.airports.url states = alt.topo_feature(data.us_10m.url, feature='states') # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( width=500, height=300 ).project('albersUsa') # Airports grouped by state points = alt.Chart(airports, title='Number of airports in US').transform_aggregate( latitude='mean(latitude)', longitude='mean(longitude)', count='count()', groupby=['state'] ).mark_circle().encode( longitude='longitude:Q', latitude='latitude:Q', size=alt.Size('count:Q').title('Number of Airports'), color=alt.value('steelblue'), tooltip=['state:N','count:Q'] ) background + points ================================================ FILE: tests/examples_methods_syntax/grouped_bar_chart2.py ================================================ """ Grouped Bar Chart with xOffset ------------------------------ Like :ref:`gallery_grouped_bar_chart`, this example shows a grouped bar chart. Whereas :ref:`gallery_grouped_bar_chart` used the ``column`` encoding channel, this example uses the ``xOffset`` encoding channel. This is adapted from a corresponding Vega-Lite Example: `Grouped Bar Chart `_. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame({ "Category":list("AAABBBCCC"), "Group":list("xyzxyzxyz"), "Value":[0.1, 0.6, 0.9, 0.7, 0.2, 1.1, 0.6, 0.1, 0.2] }) alt.Chart(source).mark_bar().encode( x="Category:N", y="Value:Q", xOffset="Group:N", color="Group:N" ) ================================================ FILE: tests/examples_methods_syntax/grouped_bar_chart_overlapping_bars.py ================================================ """ Grouped Bar Chart with xOffset and overlapping bars --------------------------------------------------- Like :ref:`gallery_grouped_bar_chart2`, this example shows a grouped bar chart using the ``xOffset`` encoding channel, but in this example the bars are partly overlapping within each group. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame( { "category": list("AABBCC"), "group": list("xyxyxy"), "value": [0.1, 0.6, 0.7, 0.2, 0.6, 0.1], } ) base = alt.Chart(source, width=alt.Step(12)).encode( x="category:N", y="value:Q", xOffset=alt.XOffset("group:N").scale(paddingOuter=0.5), ) alt.layer( base.mark_bar(size=20, stroke="white", fillOpacity=0.9).encode(fill="group:N"), base.mark_text(dy=-5).encode(text="value:Q"), ) ================================================ FILE: tests/examples_methods_syntax/grouped_bar_chart_with_error_bars.py ================================================ """ Grouped Bar Chart with Error Bars --------------------------------- This example shows a grouped bar chart with error bars. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() bars = alt.Chart().mark_bar().encode( x='year:O', y=alt.Y('mean(yield):Q').title('Mean Yield'), color='year:N', ) error_bars = alt.Chart().mark_errorbar(extent='ci').encode( x='year:O', y='yield:Q' ) alt.layer(bars, error_bars, data=source).facet( column='site:N' ) ================================================ FILE: tests/examples_methods_syntax/heat_lane.py ================================================ """ Heat Lane Chart --------------- This example shows how to make an alternative form of a histogram `designed at Google `_ with the goal of increasing accessibility. """ # category: distributions import altair as alt from altair.datasets import data source = data.cars.url chart = alt.Chart(source, title="Car horsepower", height=100, width=300).encode( alt.X("bin_Horsepower_start:Q") .title("Horsepower") .axis(grid=False), alt.X2("bin_Horsepower_end:Q"), alt.Y("y:O").axis(None), alt.Y2("y2"), ).transform_bin( ["bin_Horsepower_start", "bin_Horsepower_end"], field='Horsepower' ).transform_aggregate( count='count()', groupby=["bin_Horsepower_start", "bin_Horsepower_end"] ).transform_bin( ["bin_count_start", "bin_count_end"], field='count' ).transform_calculate( y="datum.bin_count_end/2", y2="-datum.bin_count_end/2", ).transform_joinaggregate( max_bin_count_end="max(bin_count_end)", ) layer1 = chart.mark_bar(xOffset=1, x2Offset=-1, cornerRadius=3).encode( alt.Color("max_bin_count_end:O") .title("Number of models") .scale(scheme="lighttealblue") ) layer2 = chart.mark_bar(xOffset=1, x2Offset=-1, yOffset=-3, y2Offset=3).encode( alt.Color("bin_count_end:O").title("Number of models") ) layer1 + layer2 ================================================ FILE: tests/examples_methods_syntax/hexbins.py ================================================ """ Hexbin Chart ------------ This example shows a hexbin chart. """ # category: tables import altair as alt from altair.datasets import data source = data.seattle_weather() # Size of the hexbins size = 15 # Count of distinct x features xFeaturesCount = 12 # Count of distinct y features yFeaturesCount = 7 # Name of the x field xField = 'date' # Name of the y field yField = 'date' # the shape of a hexagon hexagon = "M0,-2.3094010768L2,-1.1547005384 2,1.1547005384 0,2.3094010768 -2,1.1547005384 -2,-1.1547005384Z" alt.Chart(source).mark_point(size=size**2, shape=hexagon).encode( alt.X('xFeaturePos:Q') .title('Month') .axis(grid=False, tickOpacity=0, domainOpacity=0), alt.Y('day(' + yField + '):O') .title('Weekday') .axis(labelPadding=20, tickOpacity=0, domainOpacity=0), stroke=alt.value('black'), strokeWidth=alt.value(0.2), fill=alt.Fill('mean(temp_max):Q').scale(scheme='darkblue'), tooltip=['month(' + xField + '):O', 'day(' + yField + '):O', 'mean(temp_max):Q'] ).transform_calculate( # This field is required for the hexagonal X-Offset xFeaturePos='(day(datum.' + yField + ') % 2) / 2 + month(datum.' + xField + ')' ).properties( # Exact scaling factors to make the hexbins fit width=size * xFeaturesCount * 2, height=size * yFeaturesCount * 1.7320508076, # 1.7320508076 is approx. sin(60°)*2 ).configure_view( strokeWidth=0 ) ================================================ FILE: tests/examples_methods_syntax/histogram_gradient_color.py ================================================ """ Histogram with Gradient Color ----------------------------- This example shows how to make a histogram with gradient color. The low-high IMDB rating is represented with the color scheme `pinkyellowgreen`. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( alt.X("IMDB Rating:Q").bin(maxbins=20).scale(domain=[1, 10]), alt.Y('count()'), alt.Color("IMDB Rating:Q").bin(maxbins=20).scale(scheme='pinkyellowgreen') ) ================================================ FILE: tests/examples_methods_syntax/histogram_heatmap.py ================================================ """ 2D Histogram Heatmap -------------------- This example shows how to make a heatmap from binned quantitative data. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_rect().encode( alt.X('IMDB Rating:Q').bin(maxbins=60), alt.Y('Rotten Tomatoes Rating:Q').bin(maxbins=40), alt.Color('count():Q').scale(scheme='greenblue') ) ================================================ FILE: tests/examples_methods_syntax/histogram_responsive.py ================================================ """ Histogram with Responsive Bins ------------------------------ This shows an example of a histogram with bins that are responsive to a selection domain. Click and drag on the bottom panel to see the bins change on the top panel. """ # category: distributions import altair as alt from altair.datasets import data source = data.flights_5k.url brush = alt.selection_interval(encodings=['x']) base = alt.Chart(source).transform_calculate( time="hours(datum.date) + minutes(datum.date) / 60" ).mark_bar().encode( y='count():Q' ).properties( width=600, height=100 ) alt.vconcat( base.encode( alt.X('time:Q') .bin(maxbins=30, extent=brush) .scale(domain=brush) ), base.encode( alt.X('time:Q').bin(maxbins=30), ).add_params(brush) ) ================================================ FILE: tests/examples_methods_syntax/histogram_scatterplot.py ================================================ """ 2D Histogram Scatter Plot ------------------------- This example shows how to make a 2d histogram scatter plot. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_circle().encode( alt.X('IMDB Rating:Q').bin(), alt.Y('Rotten Tomatoes Rating:Q').bin(), size='count()' ) ================================================ FILE: tests/examples_methods_syntax/histogram_with_a_global_mean_overlay.py ================================================ """ Histogram with a Global Mean Overlay ------------------------------------ This example shows a histogram with a global mean overlay. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url base = alt.Chart(source) bar = base.mark_bar().encode( alt.X('IMDB Rating:Q').bin().axis(None), y='count()' ) rule = base.mark_rule(color='red').encode( x='mean(IMDB Rating):Q', size=alt.value(5) ) bar + rule ================================================ FILE: tests/examples_methods_syntax/horizon_graph.py ================================================ """ Horizon Graph ------------- This example shows how to make a Horizon Graph with 2 layers. (See https://idl.cs.washington.edu/papers/horizon/ for more details on Horizon Graphs.) """ # category: area charts import altair as alt import pandas as pd source = pd.DataFrame([ {"x": 1, "y": 28}, {"x": 2, "y": 55}, {"x": 3, "y": 43}, {"x": 4, "y": 91}, {"x": 5, "y": 81}, {"x": 6, "y": 53}, {"x": 7, "y": 19}, {"x": 8, "y": 87}, {"x": 9, "y": 52}, {"x": 10, "y": 48}, {"x": 11, "y": 24}, {"x": 12, "y": 49}, {"x": 13, "y": 87}, {"x": 14, "y": 66}, {"x": 15, "y": 17}, {"x": 16, "y": 27}, {"x": 17, "y": 68}, {"x": 18, "y": 16}, {"x": 19, "y": 49}, {"x": 20, "y": 15} ]) area1 = alt.Chart(source).mark_area( clip=True, interpolate='monotone', opacity=0.6 ).encode( alt.X('x').scale(zero=False, nice=False), alt.Y('y').scale(domain=[0, 50]).title('y'), ).properties( width=500, height=75 ) area2 = area1.encode( alt.Y('ny:Q').scale(domain=[0, 50]) ).transform_calculate( "ny", alt.datum.y - 50 ) area1 + area2 ================================================ FILE: tests/examples_methods_syntax/interactive_aggregation.py ================================================ """ Interactive Chart with Aggregation ================================== This example shows an interactive chart where the range binder controls a threshold as rule where the datapoints on the left-side are aggregated and on the right-side are drawn as is. The ability to slide back and fourth may help you understand how the visualization represents the aggregation. Adapted from an example by @dwootton. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.movies.url slider = alt.binding_range(min=0, max=10, step=0.1) threshold = alt.param(name="threshold", value=5, bind=slider) alt.layer( alt.Chart(source).mark_circle().encode( x=alt.X("IMDB Rating:Q").title("IMDB Rating"), y=alt.Y("Rotten Tomatoes Rating:Q").title("Rotten Tomatoes Rating") ).transform_filter( alt.datum["IMDB Rating"] >= threshold ), alt.Chart(source).mark_circle().encode( x=alt.X("IMDB Rating:Q").bin(maxbins=10), y=alt.Y("Rotten Tomatoes Rating:Q").bin(maxbins=10), size=alt.Size("count():Q").scale(domain=[0,160]) ).transform_filter( alt.datum["IMDB Rating"] < threshold ), alt.Chart().mark_rule(color="gray").encode( strokeWidth=alt.StrokeWidth(value=6), x=alt.X(datum=alt.expr(threshold.name), type="quantitative") ) ).add_params(threshold) ================================================ FILE: tests/examples_methods_syntax/interactive_bar_select_highlight.py ================================================ """ Bar Chart with Highlighting on Hover and Selection on Click ----------------------------------------------------------- This example shows a bar chart with highlighting on hover and selecting on click. (Inspired by Tableau's interaction style.) Based on https://vega.github.io/vega-lite/examples/interactive_bar_select_highlight.html """ # category: interactive charts import altair as alt source = { "values": [ {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}, ] } select = alt.selection_point(name="select", on="click") highlight = alt.selection_point(name="highlight", on="pointerover", empty=False) stroke_width = ( alt.when(select).then(alt.value(2, empty=False)) .when(highlight).then(alt.value(1)) .otherwise(alt.value(0)) ) alt.Chart(source, height=200).mark_bar( fill="#4C78A8", stroke="black", cursor="pointer" ).encode( x="a:O", y="b:Q", fillOpacity=alt.when(select).then(alt.value(1)).otherwise(alt.value(0.3)), strokeWidth=stroke_width, ).configure_scale(bandPaddingInner=0.2).add_params(select, highlight) ================================================ FILE: tests/examples_methods_syntax/interactive_column_selection.py ================================================ """ Interactive Selection of Columns ================================ This example shows how columns can be selected interactively by accessing the values from selector widgets, and then compute the difference of the selected columns. It also illustrates how to use `indexof` to filter columns based on active selection values. """ # category: interactive charts import pandas as pd import numpy as np import altair as alt # Create timeseries data rng = np.random.default_rng(905) ex_ts = pd.DataFrame( rng.random((10, 4)), columns=['a', 'b', 'c', 'd'], ).assign( date=pd.date_range( start=pd.to_datetime('2022-02-22')-pd.Timedelta(9, unit='D'), end=pd.to_datetime('2022-02-22')).strftime('%Y-%m-%d'), ) # Create heatmap with selection select_x = alt.selection_point(fields=['level_0'], name='select_x', value='b') select_y = alt.selection_point(fields=['level_1'], name='select_y', value='d') heatmap = alt.Chart( ex_ts.drop(columns='date').corr().stack().reset_index().rename(columns={0: 'correlation'}), title='Click a tile to compare timeseries', height=250, width=250, ).mark_rect().encode( alt.X('level_0').title(None), alt.Y('level_1').title(None), alt.Color('correlation').scale(domain=[-1, 1], scheme='blueorange'), opacity=alt.when(select_x, select_y).then(alt.value(1)).otherwise(alt.value(0.4)), ).add_params( select_x, select_y ) # Create chart with individual lines/timeseries base = alt.Chart( ex_ts.melt( id_vars='date', var_name='category', value_name='value', ), height=100, width=300, title='Individual timeseries', ) lines = base.transform_filter( # If the category is not in the selected values, the returned index is -1 'indexof(datum.category, select_x.level_0) !== -1' '| indexof(datum.category, select_y.level_1) !== -1' ).mark_line().encode( alt.X('date:T').axis(labels=False).title(None), alt.Y('value').scale(domain=(0, 1)), alt.Color('category').legend(orient='top', offset=-20).title(None) ) # Create chart with difference between lines/timeseries dynamic_title = alt.Title(alt.expr(f'"Difference " + {select_x.name}.level_0 + " - " + {select_y.name}.level_1')) # We pivot transform to get each category as a column lines_diff = base.transform_pivot( 'category', 'value', groupby=['date'] # In the calculate transform we use the values from the selection to subset the columns to subtract ).transform_calculate( difference = f'datum[{select_x.name}.level_0] - datum[{select_y.name}.level_1]' ).mark_line(color='grey').encode( alt.X('date:T').axis(format='%Y-%m-%d').title(None), alt.Y('difference:Q').scale(domain=(-1, 1)), ).properties( title=dynamic_title ) # Layout the charts (lines & lines_diff) | heatmap ================================================ FILE: tests/examples_methods_syntax/interactive_cross_highlight.py ================================================ """ Interactive Chart with Cross-Highlight ====================================== This example shows an interactive chart where selections in one portion of the chart affect what is shown in other panels. Click on the bar chart to see a detail of the distribution in the upper panel. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.movies.url pts = alt.selection_point(encodings=['x']) rect = alt.Chart(data.movies.url).mark_rect().encode( alt.X('IMDB Rating:Q').bin(), alt.Y('Rotten Tomatoes Rating:Q').bin(), alt.Color('count()').scale(scheme='greenblue').title('Total Records') ) circ = rect.mark_point().encode( alt.ColorValue('grey'), alt.Size('count()').title('Records in Selection') ).transform_filter( pts ) bar = alt.Chart(source, width=550, height=200).mark_bar().encode( x='Major Genre:N', y='count()', color=alt.when(pts).then(alt.ColorValue("steelblue")).otherwise(alt.ColorValue("grey")) ).add_params(pts) alt.vconcat( rect + circ, bar ).resolve_legend( color="independent", size="independent" ) ================================================ FILE: tests/examples_methods_syntax/interactive_layered_crossfilter.py ================================================ """ Interactive Crossfilter ======================= This example shows a multi-panel view of the same data, where you can interactively select a portion of the data in any of the panels to highlight that portion in any of the other panels. """ # category: interactive charts import altair as alt from altair.datasets import data source = alt.UrlData( data.flights_2k.url, format={'parse': {'date': 'date'}} ) brush = alt.selection_interval(encodings=['x']) # Define the base chart, with the common parts of the # background and highlights base = alt.Chart(width=160, height=130).mark_bar().encode( x=alt.X(alt.repeat('column')).bin(maxbins=20), y='count()' ) # gray background with selection background = base.encode( color=alt.value('#ddd') ).add_params(brush) # blue highlights on the transformed data highlight = base.transform_filter(brush) # layer the two charts & repeat alt.layer( background, highlight, data=source ).transform_calculate( "time", "hours(datum.date)" ).repeat(column=["distance", "delay", "time"]) ================================================ FILE: tests/examples_methods_syntax/interactive_legend.py ================================================ """ Interactive Legend ------------------ The following shows how to create a chart with an interactive legend, by binding the selection to ``"legend"``. Such a binding only works with ``selection_point`` when projected over a single field or encoding. """ # category: interactive charts import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url selection = alt.selection_point(fields=['series'], bind='legend') alt.Chart(source).mark_area().encode( alt.X('yearmonth(date):T').axis(domain=False, format='%Y', tickSize=0), alt.Y('sum(count):Q').stack('center').axis(None), alt.Color('series:N').scale(scheme='category20b'), opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.2)) ).add_params( selection ) ================================================ FILE: tests/examples_methods_syntax/interval_selection.py ================================================ """ Interval Selection with Initial Date Range ========================================== This is an example of creating a stacked chart for which the domain of the top chart can be selected by interacting with the bottom chart. The initial selection range is set using Python's native datetime objects. """ # category: interactive charts import altair as alt from altair.datasets import data import datetime as dt source = data.sp500.url date_range = (dt.date(2007, 6, 30), dt.date(2009, 6, 30)) brush = alt.selection_interval(encodings=['x'], value={'x': date_range}) base = alt.Chart(source, width=600, height=200).mark_area().encode( x = 'date:T', y = 'price:Q' ) upper = base.encode( alt.X('date:T').scale(domain=brush) ) lower = base.properties( height=60 ).add_params(brush) upper & lower ================================================ FILE: tests/examples_methods_syntax/interval_selection_map_quakes.py ================================================ """ Interval Selection on a Map =========================== This is an example of a binned bar chart on the right where the filtered overlay is adjusted by interacting with the map on the left. """ # category: interactive charts import altair as alt from altair.datasets import data # load data gdf_quakies = data.earthquakes() gdf_world = data.world_110m(layer="countries") # definition for interactive brush brush = alt.selection_interval( encodings=["longitude"], empty=False, value={"longitude": [-50, -110]} ) # world disk sphere = alt.Chart(alt.sphere()).mark_geoshape( fill="transparent", stroke="lightgray", strokeWidth=1 ) # countries as shapes world = alt.Chart(gdf_world).mark_geoshape( fill="lightgray", stroke="white", strokeWidth=0.1 ) # earthquakes as dots on map quakes = alt.Chart(gdf_quakies).transform_calculate( lon="datum.geometry.coordinates[0]", lat="datum.geometry.coordinates[1]", ).mark_circle(opacity=0.35, tooltip=True).encode( longitude="lon:Q", latitude="lat:Q", color=alt.when(brush).then(alt.value("goldenrod")).otherwise(alt.value("steelblue")), size=alt.Size("mag:Q").scale(type="pow", range=[1, 1000], domain=[0, 7], exponent=4), ).add_params(brush) # combine layers for the map left_map = alt.layer(sphere, world, quakes).project(type="mercator") # histogram of binned earthquakes bars = alt.Chart(gdf_quakies).mark_bar().encode( x=alt.X("mag:Q").bin(extent=[0,7]), y="count(mag):Q", color=alt.value("steelblue") ) # filtered earthquakes bars_overlay = bars.encode(color=alt.value("goldenrod")).transform_filter(brush) # combine layers for histogram right_bars = alt.layer(bars, bars_overlay) # vertical concatenate map and bars left_map | right_bars ================================================ FILE: tests/examples_methods_syntax/iowa_electricity.py ================================================ """ Iowa's Renewable Energy Boom ---------------------------- This example is a fully developed stacked chart using the sample dataset of Iowa's electricity sources. """ # category: case studies import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart( source, title=alt.Title( "Iowa's green energy boom", subtitle="A growing share of the state's energy has come from renewable sources" ) ).mark_area().encode( alt.X("year:T").title("Year"), alt.Y("net_generation:Q") .title("Share of net generation") .stack("normalize") .axis(format=".0%"), alt.Color("source:N").title("Electricity source") ) ================================================ FILE: tests/examples_methods_syntax/isotype.py ================================================ ''' Isotype Visualization ===================== Isotype Visualization shows the distribution of animals across UK and US. Inspired by `Only An Ocean Between, 1943 `_. Population Live Stock, p.13. This is adapted from Vega-Lite example https://vega.github.io/editor/#/examples/vega-lite/isotype_bar_chart ''' # category: advanced calculations import altair as alt import pandas as pd source = pd.DataFrame([ {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'} ]) domains = ['person', 'cattle', 'pigs', 'sheep'] shape_scale = alt.Scale( domain=domains, range=[ 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z', 'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z', 'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z', 'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z' ] ) color_scale = alt.Scale( domain=domains, range=['rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)'] ) alt.Chart(source).mark_point(filled=True, opacity=1, size=100).encode( alt.X('x:O').axis(None), alt.Y('animal:O').axis(None), alt.Row('country:N').title(None), alt.Shape('animal:N').legend(None).scale(shape_scale), alt.Color('animal:N').legend(None).scale(color_scale), ).transform_window( x='rank()', groupby=['country', 'animal'] ).properties( width=550, height=140 ) ================================================ FILE: tests/examples_methods_syntax/isotype_emoji.py ================================================ ''' Isotype Visualization with Emoji ================================ Isotype Visualization shows the distribution of animals across UK and US, using unicode emoji marks rather than custom SVG paths (see https://altair-viz.github.io/gallery/isotype.html). This is adapted from Vega-Lite example https://vega.github.io/vega-lite/examples/isotype_bar_chart_emoji.html. ''' # category:advanced calculations import altair as alt import pandas as pd source = pd.DataFrame([ {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'cattle'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'pigs'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'Great Britain', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'cattle'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'pigs'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'}, {'country': 'United States', 'animal': 'sheep'} ]) alt.Chart(source).mark_text(size=45, baseline='middle').encode( alt.X('x:O').axis(None), alt.Y('animal:O').axis(None), alt.Row('country:N').title(''), alt.Text('emoji:N') ).transform_calculate( emoji="{'cattle': '🐄', 'pigs': '🐖', 'sheep': '🐏'}[datum.animal]" ).transform_window( x='rank()', groupby=['country', 'animal'] ).properties( width=550, height=140 ) ================================================ FILE: tests/examples_methods_syntax/isotype_grid.py ================================================ """ Isotype Grid ------------ This example is a grid of isotype figures. """ # category: advanced calculations import altair as alt import pandas as pd data = pd.DataFrame([dict(id=i) for i in range(1, 101)]) person = ( "M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 " "-0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 " "0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 " "0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 " "0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 " "-0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 " "-0.6 -0.4 -0.6z" ) alt.Chart(data).transform_calculate( row="ceil(datum.id/10)" ).transform_calculate( col="datum.id - datum.row*10" ).mark_point( filled=True, size=50 ).encode( alt.X("col:O").axis(None), alt.Y("row:O").axis(None), alt.ShapeValue(person) ).properties( width=400, height=400 ).configure_view( strokeWidth=0 ) ================================================ FILE: tests/examples_methods_syntax/lasagna_plot.py ================================================ """ Lasagna Plot (Dense Time-Series Heatmap) ---------------------------------------- """ # category: tables import altair as alt from altair.datasets import data source = data.stocks() color_condition = ( alt.when(alt.expr.month("datum.value") == 1, alt.expr.date("datum.value") == 1) .then(alt.value("black")) .otherwise(alt.value(None)) ) alt.Chart(source, width=300, height=100).transform_filter( alt.datum.symbol != "GOOG" ).mark_rect().encode( alt.X("yearmonthdate(date):O") .title("Time") .axis( format="%Y", labelAngle=0, labelOverlap=False, labelColor=color_condition, tickColor=color_condition, ), alt.Y("symbol:N").title(None), alt.Color("sum(price)").title("Price") ) ================================================ FILE: tests/examples_methods_syntax/layered_area_chart.py ================================================ """ Layered Area Chart ------------------ This example shows a layered area chart. """ # category: area charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_area(opacity=0.3).encode( x="year:T", y=alt.Y("net_generation:Q").stack(None), color="source:N" ) ================================================ FILE: tests/examples_methods_syntax/layered_bar_chart.py ================================================ """ Layered Bar Chart ----------------- This example shows a segmented bar chart that is layered rather than stacked. """ # category: bar charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_bar(opacity=0.7).encode( x='year:O', y=alt.Y('net_generation:Q').stack(None), color="source", ) ================================================ FILE: tests/examples_methods_syntax/layered_chart_with_dual_axis.py ================================================ """ Layered chart with Dual-Axis ---------------------------- This example shows how to create a second independent y axis. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.seattle_weather() base = alt.Chart(source).encode( alt.X('month(date):T').title(None) ) area = base.mark_area(opacity=0.3, color='#57A44C').encode( alt.Y('average(temp_max)').axis(title='Avg. Temperature (°C)', titleColor='#57A44C'), alt.Y2('average(temp_min)') ) line = base.mark_line(stroke='#5276A7', interpolate='monotone').encode( alt.Y('average(precipitation)').axis(title='Precipitation (inches)', titleColor='#5276A7') ) alt.layer(area, line).resolve_scale( y='independent' ) ================================================ FILE: tests/examples_methods_syntax/layered_heatmap_text.py ================================================ """ Text over a Heatmap ------------------- An example of a layered chart of text over a heatmap using the cars dataset. """ # category: tables import altair as alt from altair.datasets import data source = data.cars() # Configure common options. We specify the aggregation # as a transform here so we can reuse it in both layers. base = alt.Chart(source).transform_aggregate( mean_horsepower='mean(Horsepower)', groupby=['Origin', 'Cylinders'] ).encode( alt.X('Cylinders:O'), alt.Y('Origin:O'), ) # Configure heatmap heatmap = base.mark_rect().encode( alt.Color('mean_horsepower:Q') .scale(scheme='viridis') .title("Mean of Horsepower") ) color = ( alt.when(alt.datum.mean_horsepower > 150) .then(alt.value("black")) .otherwise(alt.value("white")) ) # Configure text text = base.mark_text(baseline='middle').encode( alt.Text('mean_horsepower:Q', format=".0f"), color=color ) # Draw the chart heatmap + text ================================================ FILE: tests/examples_methods_syntax/layered_histogram.py ================================================ """ Layered Histogram ================= This example shows how to use opacity to make a layered histogram in Altair. """ # category: distributions import pandas as pd import altair as alt import numpy as np np.random.seed(42) # Generating Data source = pd.DataFrame({ 'Trial A': np.random.normal(0, 0.8, 1000), 'Trial B': np.random.normal(-2, 1, 1000), 'Trial C': np.random.normal(3, 2, 1000) }) alt.Chart(source).transform_fold( ['Trial A', 'Trial B', 'Trial C'], as_=['Experiment', 'Measurement'] ).mark_bar( opacity=0.3, binSpacing=0 ).encode( alt.X('Measurement:Q').bin(maxbins=100), alt.Y('count()').stack(None), alt.Color('Experiment:N') ) ================================================ FILE: tests/examples_methods_syntax/line_chart_with_color_datum.py ================================================ """ Line Chart with Datum for Color ------------------------------- An example of using ``repeat`` inside ``datum`` to color a multi-series line chart. This is adapted from this corresponding Vega-Lite Example: `Repeat and Layer to Show Different Movie Measures `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.movies() alt.Chart(source).mark_line().encode( alt.X("IMDB Rating").bin(True), alt.Y(alt.repeat("layer")) .aggregate("mean") .title("Mean of US and Worldwide Gross"), color=alt.datum(alt.repeat("layer")), ).repeat( layer=["US Gross", "Worldwide Gross"] ) ================================================ FILE: tests/examples_methods_syntax/line_chart_with_cumsum.py ================================================ """ Line Chart with Cumulative Sum ------------------------------ This chart creates a simple line chart from the cumulative sum of a fields. """ # category: line charts import altair as alt from altair.datasets import data source = data.wheat() alt.Chart(source, width=600).mark_line().transform_window( # Sort the data chronologically sort=[{'field': 'year'}], # Include all previous records before the current record and none after # (This is the default value so you could skip it and it would still work.) frame=[None, 0], # What to add up as you go cumulative_wheat='sum(wheat)' ).encode( x='year:O', # Plot the calculated field created by the transformation y='cumulative_wheat:Q' ) ================================================ FILE: tests/examples_methods_syntax/line_chart_with_cumsum_faceted.py ================================================ """ Faceted Line Chart with Cumulative Sum -------------------------------------- This chart creates one facet per natural disaster and shows the cumulative number of deaths for that category. Note the use of different predicates to filter based on both a list and a range. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.disasters() columns_sorted = ['Drought', 'Epidemic', 'Earthquake', 'Flood'] alt.Chart(source).transform_filter( alt.FieldOneOfPredicate(field='Entity', oneOf=columns_sorted), alt.FieldRangePredicate(field='Year', range=[1900, 2000]) ).transform_window( cumulative_deaths='sum(Deaths)', groupby=['Entity'] # Calculate cumulative sum of Deaths by Entity ).mark_line().encode( alt.X('Year:Q', title=None).axis(format='d'), alt.Y('cumulative_deaths:Q', title=None), alt.Color('Entity:N', legend=None) ).properties( width=300, height=150 ).facet( facet=alt.Facet( 'Entity:N', title=None, sort=columns_sorted, header=alt.Header(labelAnchor='start', labelFontStyle='italic') ), title=alt.Title( text=['Cumulative casualties by type of disaster', 'in the 20th century'], anchor='middle' ), columns=2 ).resolve_axis(y='independent', x='independent') ================================================ FILE: tests/examples_methods_syntax/line_chart_with_custom_legend.py ================================================ """ Line Chart with Custom Legend ----------------------------- This example uses the argmax aggregation function in order to create a custom legend for a line chart. """ # category: line charts import altair as alt from altair.datasets import data source = data.stocks() base = alt.Chart(source).encode( alt.Color("symbol").legend(None) ).transform_filter( "datum.symbol !== 'IBM'" ).properties( width=500 ) line = base.mark_line().encode(x="date", y="price") last_price = base.mark_circle().encode( alt.X("last_date['date']:T"), alt.Y("last_date['price']:Q") ).transform_aggregate( last_date="argmax(date)", groupby=["symbol"] ) company_name = last_price.mark_text(align="left", dx=4).encode(text="symbol") chart = (line + last_price + company_name).encode( x=alt.X().title("date"), y=alt.Y().title("price") ) chart ================================================ FILE: tests/examples_methods_syntax/line_custom_order.py ================================================ """ Line Chart with Custom Order ---------------------------- By default, the line's path (order of points in the line) is determined by data values on the temporal/ordinal field. However, a field can be mapped to the order channel for a custom path. For example, to show a pattern of data change over time between gasoline price and average miles driven per capita we use order channel to sort the points in the line by time field (year). The earliest year (1956) is one endpoint and the latest year (2010) is the other endpoint. This is based on Hannah Fairfield's article 'Driving Shifts Into Reverse'. See https://archive.nytimes.com/www.nytimes.com/imagepages/2010/05/02/business/02metrics.html. """ # category: line charts import altair as alt from altair.datasets import data source = data.driving() alt.Chart(source).mark_line(point=True).encode( x=alt.X("miles").scale(zero=False), y=alt.Y("gas").scale(zero=False), order="year", tooltip=["miles", "gas", "year"], ) ================================================ FILE: tests/examples_methods_syntax/line_percent.py ================================================ """ Line Chart with Percent axis ---------------------------- This example shows how to format the tick labels of the y-axis of a chart as percentages. """ # category: line charts import altair as alt from altair.datasets import data source = data.jobs.url alt.Chart(source).mark_line().encode( alt.X('year:O'), alt.Y('perc:Q').axis(format='%'), alt.Color('sex:N') ).transform_filter( alt.datum.job == 'Welder' ) ================================================ FILE: tests/examples_methods_syntax/line_with_ci.py ================================================ """ Line Chart with Confidence Interval Band ---------------------------------------- How to make a line chart with a bootstrapped 95% confidence interval band. """ # category: uncertainties and trends import altair as alt from altair.datasets import data source = data.cars() line = alt.Chart(source).mark_line().encode( x='Year', y='mean(Miles_per_Gallon)' ) band = alt.Chart(source).mark_errorband(extent='ci').encode( x='Year', y=alt.Y('Miles_per_Gallon').title('Miles/Gallon'), ) band + line ================================================ FILE: tests/examples_methods_syntax/line_with_last_value_labeled.py ================================================ """ Line Chart with Last Value Labeled ---------------------------------- This chart shows a line chart with a label annotating the final value """ # category: line charts import altair as alt from altair.datasets import data # Import example data source = data.stocks() # Create a common chart object chart = alt.Chart(source).transform_filter( alt.datum.symbol != "IBM" # A reducation of the dataset to clarify our example. Not required. ).encode( alt.Color("symbol").legend(None) ) # Draw the line line = chart.mark_line().encode( x="date:T", y="price:Q" ) # Use the `argmax` aggregate to limit the dataset to the final value label = chart.encode( x='max(date):T', y=alt.Y('price:Q').aggregate(argmax='date'), text='symbol' ) # Create a text label text = label.mark_text(align='left', dx=4) # Create a circle annotation circle = label.mark_circle() # Draw the chart with all the layers combined line + circle + text ================================================ FILE: tests/examples_methods_syntax/line_with_log_scale.py ================================================ """ Line Chart with Logarithmic Scale --------------------------------- How to make a line chart on a `Logarithmic scale `_. """ # category: line charts import altair as alt from altair.datasets import data source = data.population() alt.Chart(source).mark_line().encode( x='year:O', y=alt.Y('sum(people)').scale(type="log") ) ================================================ FILE: tests/examples_methods_syntax/london_tube.py ================================================ """ London Tube Lines ================= This example shows the London tube lines against the background of the borough boundaries. It is based on the vega-lite example at https://vega.github.io/vega-lite/examples/geo_layer_line_london.html. """ # category: case studies import altair as alt from altair.datasets import data boroughs = alt.topo_feature(data.london_boroughs.url, 'boroughs') tubelines = alt.topo_feature(data.london_tube_lines.url, 'line') centroids = data.london_centroids.url background = alt.Chart(boroughs, width=700, height=500).mark_geoshape( stroke='white', strokeWidth=2 ).encode( color=alt.value('#eee'), ) labels = alt.Chart(centroids).mark_text().encode( longitude='cx:Q', latitude='cy:Q', text='bLabel:N', size=alt.value(8), opacity=alt.value(0.6) ).transform_calculate( "bLabel", "indexof (datum.name,' ') > 0 ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name" ) line_scale = alt.Scale(domain=["Bakerloo", "Central", "Circle", "District", "DLR", "Hammersmith & City", "Jubilee", "Metropolitan", "Northern", "Piccadilly", "Victoria", "Waterloo & City"], range=["rgb(137,78,36)", "rgb(220,36,30)", "rgb(255,206,0)", "rgb(1,114,41)", "rgb(0,175,173)", "rgb(215,153,175)", "rgb(106,114,120)", "rgb(114,17,84)", "rgb(0,0,0)", "rgb(0,24,168)", "rgb(0,160,226)", "rgb(106,187,170)"]) lines = alt.Chart(tubelines).mark_geoshape( filled=False, strokeWidth=2 ).encode( alt.Color('id:N') .title(None) .legend(orient='bottom-right', offset=0) .scale(line_scale) ) background + labels + lines ================================================ FILE: tests/examples_methods_syntax/maps_faceted_species.py ================================================ """ Faceted County-Level Choropleth Maps ------------------------------------ A set of maps arranged in a grid, each showing the distribution of a species' projected habitat across US counties. Each choropleth map uses color intensity to represent the percentage values within county boundaries. """ # category: maps import altair as alt from altair.datasets import data # Load species data with county_id as number csv_data = alt.UrlData(data.species.url, format=alt.CsvDataFormat(parse={'county_id': 'number'})) # Load US counties topology counties = alt.topo_feature(data.us_10m.url, 'counties') chart = alt.Chart(csv_data).mark_geoshape().encode( shape='geo:G', # Geographic shape encoding for map rendering color=alt.Color('habitat_yearround_pct:Q') .scale(domain=[0, 1], scheme='viridis', zero=True, nice=False) .title(['Suitable Habitat', '% of County']) .legend(format='.0%'), tooltip=[ alt.Tooltip('county_id:N').title('County ID'), alt.Tooltip('habitat_yearround_pct:Q').title('Habitat %').format('.2%') ], facet=alt.Facet('common_name:N', columns=2).title(None), ).transform_lookup( lookup='county_id', from_=alt.LookupData(data=counties, key='id'), as_='geo' # Join county geometry data ).project(type='albers').properties(width=300, height=200) # Display the chart chart ================================================ FILE: tests/examples_methods_syntax/mosaic_with_labels.py ================================================ """ Mosaic Chart with Labels ------------------------ """ # category: tables import altair as alt from altair.datasets import data source = data.cars() base = ( alt.Chart(source) .transform_aggregate(count_="count()", groupby=["Origin", "Cylinders"]) .transform_stack( stack="count_", as_=["stack_count_Origin1", "stack_count_Origin2"], offset="normalize", sort=[alt.SortField("Origin", "ascending")], groupby=[], ) .transform_window( x="min(stack_count_Origin1)", x2="max(stack_count_Origin2)", rank_Cylinders="dense_rank()", distinct_Cylinders="distinct(Cylinders)", groupby=["Origin"], frame=[None, None], sort=[alt.SortField("Cylinders", "ascending")], ) .transform_window( rank_Origin="dense_rank()", frame=[None, None], sort=[alt.SortField("Origin", "ascending")], ) .transform_stack( stack="count_", groupby=["Origin"], as_=["y", "y2"], offset="normalize", sort=[alt.SortField("Cylinders", "ascending")], ) .transform_calculate( ny="datum.y + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3", ny2="datum.y2 + (datum.rank_Cylinders - 1) * datum.distinct_Cylinders * 0.01 / 3", nx="datum.x + (datum.rank_Origin - 1) * 0.01", nx2="datum.x2 + (datum.rank_Origin - 1) * 0.01", xc="(datum.nx+datum.nx2)/2", yc="(datum.ny+datum.ny2)/2", ) ) rect = base.mark_rect().encode( x=alt.X("nx:Q").axis(None), x2="nx2", y="ny:Q", y2="ny2", color=alt.Color("Origin:N").legend(None), opacity=alt.Opacity("Cylinders:Q").legend(None), tooltip=["Origin:N", "Cylinders:Q"], ) text = base.mark_text(baseline="middle").encode( alt.X("xc:Q").axis(None), alt.Y("yc:Q").title("Cylinders"), text="Cylinders:N" ) mosaic = rect + text origin_labels = base.mark_text(baseline="middle", align="center").encode( alt.X("min(xc):Q").title("Origin").axis(orient="top"), alt.Color("Origin").legend(None), text="Origin", ) ( (origin_labels & mosaic) .resolve_scale(x="shared") .configure_view(stroke="") .configure_concat(spacing=10) .configure_axis(domain=False, ticks=False, labels=False, grid=False) ) ================================================ FILE: tests/examples_methods_syntax/multifeature_scatter_plot.py ================================================ """ Multifeature Scatter Plot ========================= This example shows how to make a scatter plot with multiple feature encodings. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source).mark_circle().encode( alt.X('Flipper Length (mm)').scale(zero=False), alt.Y('Body Mass (g)').scale(zero=False, padding=1), alt.Size('Beak Depth (mm)').scale(zero=False), color='Species' ) ================================================ FILE: tests/examples_methods_syntax/multiline_highlight.py ================================================ """ Multi-Line Highlight ==================== This multi-line chart uses an invisible Voronoi tessellation to handle pointerover to identify the nearest point and then highlight the line on which the point falls. It is adapted from the Vega-Lite example found at https://bl.ocks.org/amitkaps/fe4238e716db53930b2f1a70d3401701 """ # category: interactive charts import altair as alt from altair.datasets import data source = data.stocks() highlight = alt.selection_point(on='pointerover', fields=['symbol'], nearest=True) base = alt.Chart(source).encode( x='date:T', y='price:Q', color='symbol:N' ) points = base.mark_circle().encode( opacity=alt.value(0) ).add_params( highlight ).properties( width=600 ) lines = base.mark_line().encode( size=alt.when(~highlight).then(alt.value(1)).otherwise(alt.value(3)) ) points + lines ================================================ FILE: tests/examples_methods_syntax/multiline_tooltip.py ================================================ """ Multi-Line Tooltip ================== This example shows how you can use selections and layers to create a tooltip-like behavior tied to the x position of the cursor. If you are looking for more standard tooltips, see the :ref:`gallery_multiline_tooltip_standard` example. In this example, we'll employ a little trick to isolate the x-position of the cursor: we add some transparent points with only an x encoding (no y encoding) and tie a *nearest* selection to these, tied to the "x" field. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(42) columns = ["A", "B", "C"] source = pd.DataFrame( np.cumsum(np.random.randn(100, 3), 0).round(2), columns=columns, index=pd.RangeIndex(100, name="x") ) source = source.reset_index().melt("x", var_name="category", value_name="y") # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection_point(nearest=True, on="pointerover", fields=["x"], empty=False) # The basic line line = alt.Chart(source).mark_line(interpolate="basis").encode( x="x:Q", y="y:Q", color="category:N" ) # Transparent selectors across the chart. This is what tells us # the x-value of the cursor selectors = alt.Chart(source).mark_point().encode( x="x:Q", opacity=alt.value(0), ).add_params( nearest ) when_near = alt.when(nearest) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=when_near.then(alt.value(1)).otherwise(alt.value(0)) ) # Draw text labels near the points, and highlight based on selection text = line.mark_text(align="left", dx=5, dy=-5).encode( text=when_near.then("y:Q").otherwise(alt.value(" ")) ) # Draw a rule at the location of the selection rules = alt.Chart(source).mark_rule(color="gray").encode( x="x:Q", ).transform_filter( nearest ) # Put the five layers into a chart and bind the data alt.layer( line, selectors, points, rules, text ).properties( width=600, height=300 ) ================================================ FILE: tests/examples_methods_syntax/multiline_tooltip_standard.py ================================================ """ Multi-Line Tooltip (Standard) ============================= This example shows how to add a standard tooltip to the same chart as in :ref:`gallery_multiline_tooltip`. You can find another example using this approach in the documentation on the :ref:`user-guide-pivot-transform` transformation. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(42) columns = ["A", "B", "C"] source = pd.DataFrame( np.cumsum(np.random.randn(100, 3), 0).round(2), columns=columns, index=pd.RangeIndex(100, name="x"), ) source = source.reset_index().melt("x", var_name="category", value_name="y") # Create a selection that chooses the nearest point & selects based on x-value nearest = alt.selection_point(nearest=True, on="pointerover", fields=["x"], empty=False) # The basic line line = alt.Chart(source).mark_line(interpolate="basis").encode( x="x:Q", y="y:Q", color="category:N" ) when_near = alt.when(nearest) # Draw points on the line, and highlight based on selection points = line.mark_point().encode( opacity=when_near.then(alt.value(1)).otherwise(alt.value(0)) ) # Draw a rule at the location of the selection rules = alt.Chart(source).transform_pivot( "category", value="y", groupby=["x"] ).mark_rule(color="gray").encode( x="x:Q", opacity=when_near.then(alt.value(0.3)).otherwise(alt.value(0)), tooltip=[alt.Tooltip(c, type="quantitative") for c in columns], ).add_params(nearest) # Put the five layers into a chart and bind the data alt.layer( line, points, rules ).properties( width=600, height=300 ) ================================================ FILE: tests/examples_methods_syntax/multiple_interactions.py ================================================ """ Multiple Interactions ===================== This example shows how multiple user inputs can be layered onto a chart. The four inputs have functionality as follows: * Dropdown: Filters the movies by genre * Radio Buttons: Highlights certain films by Worldwide Gross * Mouse Drag and Scroll: Zooms the x and y scales to allow for panning * Checkbox: Scales the marker size of big budget films """ # category: interactive charts import altair as alt from altair.datasets import data movies = alt.UrlData( data.movies.url, format=alt.DataFormat(parse={'Release Date':'date'}) ) ratings = ['G', 'NC-17', 'PG', 'PG-13', 'R'] genres = [ 'Action', 'Adventure', 'Black Comedy', 'Comedy', 'Concert/Performance', 'Documentary', 'Drama', 'Horror', 'Musical', 'Romantic Comedy', 'Thriller/Suspense', 'Western' ] base = alt.Chart(movies, width=200, height=200).mark_point(filled=True).transform_calculate( Big_Budget_Film = "datum['Production Budget'] > 100000000 ? 'Yes' : 'No'", Release_Year = "year(datum['Release Date'])", ).transform_filter( alt.datum['IMDB Rating'] > 0 ).transform_filter( alt.FieldOneOfPredicate(field='MPAA Rating', oneOf=ratings) ).encode( x=alt.X('Worldwide Gross:Q').scale(domain=(100000,10**9), clamp=True), y='IMDB Rating:Q', tooltip='Title:N' ) # A slider filter year_slider = alt.binding_range(min=1969, max=2018, step=1, name='Release Year') slider_selection = alt.selection_point(bind=year_slider, fields=['Release_Year']) filter_year = base.add_params( slider_selection ).transform_filter( slider_selection ).properties(title='Slider Filtering') # A dropdown filter genre_dropdown = alt.binding_select(options=genres, name='Genre') genre_select = alt.selection_point(fields=['Major Genre'], bind=genre_dropdown) filter_genres = base.add_params( genre_select ).transform_filter( genre_select ).properties(title='Dropdown Filtering') # Color changing marks rating_radio = alt.binding_radio(options=ratings, name='Rating') rating_select = alt.selection_point(fields=['MPAA Rating'], bind=rating_radio) rating_color = ( alt.when(rating_select) .then(alt.Color('MPAA Rating:N').legend(None)) .otherwise(alt.value('lightgray')) ) highlight_ratings = base.add_params( rating_select ).encode( color=rating_color ).properties(title='Radio Button Highlighting') # Boolean selection for format changes input_checkbox = alt.binding_checkbox(name='Big Budget Films ') checkbox_selection = alt.param(bind=input_checkbox) size_checkbox = ( alt.when(checkbox_selection) .then(alt.Size('Big Budget Film:N').scale(range=[25, 150])) .otherwise(alt.value(25)) ) budget_sizing = base.add_params( checkbox_selection ).encode( size=size_checkbox ).properties(title='Checkbox Formatting') (filter_year | budget_sizing) & (highlight_ratings | filter_genres) ================================================ FILE: tests/examples_methods_syntax/natural_disasters.py ================================================ """ Global Deaths from Natural Disasters ------------------------------------ This example shows a proportional symbols visualization of deaths from natural disasters by year and type. """ # category: case studies import altair as alt from altair.datasets import data source = data.disasters.url alt.Chart(source).transform_filter( alt.datum.Entity != 'All natural disasters' ).mark_circle( opacity=0.8, stroke='black', strokeWidth=1, strokeOpacity=0.4 ).encode( alt.X('Year:T') .title(None) .scale(domain=['1899','2018']), alt.Y('Entity:N') .title(None) .sort(field="Deaths", op="sum", order='descending'), alt.Size('Deaths:Q') .scale(range=[0, 2500]) .title('Deaths') .legend(clipHeight=30, format='s'), alt.Color('Entity:N').legend(None), tooltip=[ "Entity:N", alt.Tooltip("Year:T", format='%Y'), alt.Tooltip("Deaths:Q", format='~s') ], ).properties( width=450, height=320, title=alt.Title( text="Global Deaths from Natural Disasters (1900-2017)", subtitle="The size of the bubble represents the total death count per year, by type of disaster", anchor='start' ) ).configure_axisY( domain=False, ticks=False, offset=10 ).configure_axisX( grid=False, ).configure_view( stroke=None ) ================================================ FILE: tests/examples_methods_syntax/normalized_stacked_area_chart.py ================================================ """ Normalized Stacked Area Chart ----------------------------- This example shows how to make a normalized stacked area chart. """ # category: area charts import altair as alt from altair.datasets import data source = data.iowa_electricity() alt.Chart(source).mark_area().encode( x="year:T", y=alt.Y("net_generation:Q").stack("normalize"), color="source:N" ) ================================================ FILE: tests/examples_methods_syntax/normalized_stacked_bar_chart.py ================================================ """ Normalized Stacked Bar Chart ---------------------------- This is an example of a normalized stacked bar chart using data which contains crop yields over different regions and different years in the 1930s. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x=alt.X('sum(yield)').stack("normalize"), y='variety', color='site' ) ================================================ FILE: tests/examples_methods_syntax/pacman_chart.py ================================================ """ Pacman Chart ------------ Chart made using ``mark_arc`` and constant values. This could also be made using ``alt.Chart(source).mark_arc(color = "gold", theta = (5/8)*np.pi, theta2 = (19/8)*np.pi,radius=100)``. """ # category: circular plots import numpy as np import altair as alt alt.Chart().mark_arc(color="gold").encode( theta=alt.ThetaDatum((5 / 8) * np.pi).scale(None), theta2=alt.Theta2Datum((19 / 8) * np.pi), radius=alt.RadiusDatum(100).scale(None), ) ================================================ FILE: tests/examples_methods_syntax/parallel_coordinates.py ================================================ """ Parallel Coordinates -------------------- A `Parallel Coordinates `_ chart is a chart that lets you visualize the individual data points by drawing a single line for each of them. Such a chart can be created in Altair by first transforming the data into a suitable representation. This example shows a parallel coordinates chart with the Penguins dataset. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.penguins() alt.Chart(source, width=500).transform_window( index='count()' ).transform_fold( ['Beak Length (mm)', 'Beak Depth (mm)', 'Flipper Length (mm)'] ).mark_line().encode( x='key:N', y='value:Q', color='Species:N', detail='index:N', opacity=alt.value(0.5) ) ================================================ FILE: tests/examples_methods_syntax/percentage_of_total.py ================================================ """ Calculating Percentage of Total ------------------------------- This chart demonstrates how to use a joinaggregate transform to display data values as a percentage of total. """ # category: bar charts import altair as alt import pandas as pd source = pd.DataFrame({ 'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'], 'Time': [8, 2, 4, 8, 2] }) alt.Chart(source).transform_joinaggregate( TotalTime='sum(Time)', ).transform_calculate( PercentOfTotal="datum.Time / datum.TotalTime" ).mark_bar().encode( alt.X('PercentOfTotal:Q').axis(format='.0%'), y='Activity:N' ) ================================================ FILE: tests/examples_methods_syntax/pie_chart.py ================================================ """ Pie Chart --------- This example shows how to make a Pie Chart using ``mark_arc``. This is adapted from a corresponding Vega-Lite Example: `Pie Chart `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) alt.Chart(source).mark_arc().encode( theta="value", color="category" ) ================================================ FILE: tests/examples_methods_syntax/pie_chart_with_labels.py ================================================ """ Pie Chart with Labels --------------------- This example shows how to layer text over arc marks (``mark_arc``) to label pie charts. This is adapted from a corresponding Vega-Lite Example: `Pie Chart with Labels `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame( {"category": ["a", "b", "c", "d", "e", "f"], "value": [4, 6, 10, 3, 7, 8]} ) base = alt.Chart(source).encode( alt.Theta("value:Q").stack(True), alt.Color("category:N").legend(None) ) pie = base.mark_arc(outerRadius=120) text = base.mark_text(radius=140, size=20).encode(text="category:N") pie + text ================================================ FILE: tests/examples_methods_syntax/polar_bar_chart.py ================================================ """ Polar Bar Chart --------------- This example shows how to make a polar bar chart using ``mark_arc``. This could also have been called a "pie chart with axis labels", but is more commonly referred to as a polar bar chart. The axis lines are created using pie charts with only the stroke visible. """ # category: circular plots import math import altair as alt import pandas as pd source = pd.DataFrame({ "hour": range(24), "observations": [2, 2, 2, 2, 2, 3, 4, 4, 8, 8, 9, 7, 5, 6, 8, 8, 7, 7, 4, 3, 3, 2, 2, 2] }) polar_bars = alt.Chart(source).mark_arc(stroke='white', tooltip=True).encode( theta=alt.Theta("hour:O"), radius=alt.Radius('observations').scale(type='linear'), radius2=alt.datum(1), ) # Create the circular axis lines for the number of observations axis_rings = alt.Chart(pd.DataFrame({"ring": range(2, 11, 2)})).mark_arc(stroke='lightgrey', fill=None).encode( theta=alt.value(2 * math.pi), radius=alt.Radius('ring').stack(False) ) axis_rings_labels = axis_rings.mark_text(color='grey', radiusOffset=5, align='left').encode( text="ring", theta=alt.value(math.pi / 4) ) # Create the straight axis lines for the time of the day axis_lines = alt.Chart(pd.DataFrame({ "radius": 10, "theta": math.pi / 2, 'hour': ['00:00', '06:00', '12:00', '18:00'] })).mark_arc(stroke='lightgrey', fill=None).encode( theta=alt.Theta('theta').stack(True), radius=alt.Radius('radius'), radius2=alt.datum(1), ) axis_lines_labels = axis_lines.mark_text( color='grey', radiusOffset=5, thetaOffset=-math.pi / 4, # These adjustments could be left out with a larger radius offset, but they make the label positioning a bit cleaner align=alt.expr('datum.hour == "18:00" ? "right" : datum.hour == "06:00" ? "left" : "center"'), baseline=alt.expr('datum.hour == "00:00" ? "bottom" : datum.hour == "12:00" ? "top" : "middle"'), ).encode(text="hour") alt.layer( axis_rings, polar_bars, axis_rings_labels, axis_lines, axis_lines_labels, title=['Observations throughout the day', ''] ) ================================================ FILE: tests/examples_methods_syntax/poly_fit_regression.py ================================================ """ Polynomial Fit Plot with Regression Transform ============================================= This example shows how to overlay data with multiple fitted polynomials using the regression transform. """ # category: uncertainties and trends import numpy as np import pandas as pd import altair as alt # Generate some random data rng = np.random.RandomState(1) x = rng.rand(40) ** 2 y = 10 - 1.0 / (x + 0.1) + rng.randn(40) source = pd.DataFrame({"x": x, "y": y}) # Define the degree of the polynomial fits degree_list = [1, 3, 5] base = alt.Chart(source).mark_circle(color="black").encode( alt.X("x"), alt.Y("y") ) polynomial_fit = [ base.transform_regression( "x", "y", method="poly", order=order, as_=["x", str(order)] ) .mark_line() .transform_fold([str(order)], as_=["degree", "y"]) .encode(alt.Color("degree:N")) for order in degree_list ] alt.layer(base, *polynomial_fit) ================================================ FILE: tests/examples_methods_syntax/pyramid.py ================================================ """ Pyramid Pie Chart ----------------- Altair reproduction of http://robslink.com/SAS/democd91/pyramid_pie.htm """ # category: case studies import altair as alt import pandas as pd category = ['Sky', 'Shady side of a pyramid', 'Sunny side of a pyramid'] color = ["#416D9D", "#674028", "#DEAC58"] df = pd.DataFrame({'category': category, 'value': [75, 10, 15]}) alt.Chart(df, width=150, height=150).mark_arc(outerRadius=80).encode( alt.Theta('value:Q').scale(range=[2.356, 8.639]), alt.Color('category:N') .title(None) .scale(domain=category, range=color) .legend(orient='none', legendX=160, legendY=50), order='value:Q' ).configure_view( strokeOpacity=0 ) ================================================ FILE: tests/examples_methods_syntax/radial_chart.py ================================================ """ Radial Chart ------------ This radial plot uses both angular and radial extent to convey multiple dimensions of data. This is adapted from a corresponding Vega-Lite Example: `Radial Plot `_. """ # category: circular plots import pandas as pd import altair as alt source = pd.DataFrame({"values": [12, 23, 47, 6, 52, 19]}) base = alt.Chart(source).encode( alt.Theta("values:Q").stack(True), alt.Radius("values").scale(type="sqrt", zero=True, rangeMin=20), color="values:N", ) c1 = base.mark_arc(innerRadius=20, stroke="#fff") c2 = base.mark_text(radiusOffset=10).encode(text="values:Q") c1 + c2 ================================================ FILE: tests/examples_methods_syntax/ranged_dot_plot.py ================================================ """ Ranged Dot Plot --------------- This example shows a ranged dot plot to convey changing life expectancy for the five most populous countries (between 1955 and 2000). """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.countries.url chart = ( alt.Chart(source) .encode(x="life_expect:Q", y="country:N") .transform_filter( alt.FieldOneOfPredicate( field="country", oneOf=["China", "India", "United States", "Indonesia", "Brazil"], ) ) .transform_filter(alt.FieldOneOfPredicate(field="year", oneOf=[1955, 2000])) ) line = chart.mark_line(color="#db646f").encode(detail="country:N") # Add points for life expectancy in 1955 & 2000 color = alt.Color("year:O").scale(domain=[1955, 2000], range=["#e6959c", "#911a24"]) points = ( chart.mark_point( size=100, opacity=1, filled=True, ) .encode(color=color) .interactive() ) (line + points) ================================================ FILE: tests/examples_methods_syntax/ridgeline_plot.py ================================================ """ Ridgeline plot -------------- A `Ridgeline plot `_ lets you visualize distribution of a numeric value for different subsets of data (what we call "facets" in Altair). Such a chart can be created in Altair by first transforming the data into a suitable representation. """ # category: distributions import altair as alt from altair.datasets import data source = data.seattle_weather.url step = 20 overlap = 1 alt.Chart(source, height=step).transform_timeunit( Month='month(date)' ).transform_joinaggregate( mean_temp='mean(temp_max)', groupby=['Month'] ).transform_bin( ['bin_max', 'bin_min'], 'temp_max' ).transform_aggregate( value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max'] ).transform_impute( impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0 ).mark_area( interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5 ).encode( alt.X('bin_min:Q') .bin('binned') .title('Maximum Daily Temperature (C)'), alt.Y('value:Q') .axis(None) .scale(range=[step, -step * overlap]), alt.Fill('mean_temp:Q') .legend(None) .scale(domain=[30, 5], scheme='redyellowblue') ).facet( row=alt.Row('Month:T') .title(None) .header(labelAngle=0, labelAlign='left', format='%B') ).properties( title='Seattle Weather', bounds='flush' ).configure_facet( spacing=0 ).configure_view( stroke=None ).configure_title( anchor='end' ) ================================================ FILE: tests/examples_methods_syntax/scatter_linked_table.py ================================================ """ Brushing Scatter Plot to Show Data on a Table --------------------------------------------- A scatter plot of the cars dataset, with data tables for horsepower, MPG, and origin. The tables update to reflect the selection on the scatter plot. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.cars() # Brush for selection brush = alt.selection_interval() # Scatter Plot points = alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Miles_per_Gallon:Q', color=alt.when(brush).then(alt.value("steelblue")).otherwise(alt.value("grey")) ).add_params(brush) # Base chart for data tables ranked_text = alt.Chart(source).mark_text(align='right').encode( y=alt.Y('row_number:O').axis(None) ).transform_filter( brush ).transform_window( row_number='row_number()' ).transform_filter( alt.datum.row_number < 15 ) # Data Tables horsepower = ranked_text.encode(text='Horsepower:N').properties( title=alt.Title(text='Horsepower', align='right') ) mpg = ranked_text.encode(text='Miles_per_Gallon:N').properties( title=alt.Title(text='MPG', align='right') ) origin = ranked_text.encode(text='Origin:N').properties( title=alt.Title(text='Origin', align='right') ) text = alt.hconcat(horsepower, mpg, origin) # Combine data tables # Build chart alt.hconcat( points, text ).resolve_legend( color="independent" ).configure_view( stroke=None ) ================================================ FILE: tests/examples_methods_syntax/scatter_marginal_hist.py ================================================ """ Scatter Plot with Faceted Marginal Histograms --------------------------------------------- This example demonstrates how to generate a scatter plot, with faceted marginal histograms that share their respective x- and y-limits. """ # category: distributions import altair as alt from altair.datasets import data source = data.penguins() base = alt.Chart(source) base_bar = base.mark_bar(opacity=0.3, binSpacing=0) xscale = alt.Scale(domain=(170, 235)) yscale = alt.Scale(domain=(2500, 6500)) points = base.mark_circle().encode( alt.X("Flipper Length (mm)").scale(xscale), alt.Y("Body Mass (g)").scale(yscale), color="Species", ) top_hist = ( base_bar .encode( alt.X("Flipper Length (mm):Q") # when using bins, the axis scale is set through # the bin extent, so we do not specify the scale here # (which would be ignored anyway) .bin(maxbins=20, extent=xscale.domain).stack(None).title(""), alt.Y("count()").stack(None).title(""), alt.Color("Species:N"), ) .properties(height=60) ) right_hist = ( base_bar .encode( alt.Y("Body Mass (g):Q") .bin(maxbins=20, extent=yscale.domain) .stack(None) .title(""), alt.X("count()").stack(None).title(""), alt.Color("Species:N"), ) .properties(width=60) ) top_hist & (points | right_hist) ================================================ FILE: tests/examples_methods_syntax/scatter_point_paths_hover.py ================================================ """ Scatter plot with point paths on hover with search box ====================================================== This example combines cross-sectional analysis (comparing countries at a single point in time) with longitudinal analysis (tracking changes in individual countries over time), using an interactive visualization technique inspired by [this Vega example](https://vega.github.io/vega/examples/global-development/) Key features: 1. Point Paths. On hover, shows data trajectories using a trail mark that thickens from past to present, clearly indicating the direction of time. 2. Search Box. Implements a case-insensitive regex filter for country names, enabling dynamic, flexible data point selection to enhance exploratory analysis. """ # category: interactive charts import altair as alt from altair.datasets import data # Data source source = data.gapminder.url # X-value slider x_slider = alt.binding_range(min=1955, max=2005, step=5, name='Year ') x_select = alt.selection_point(name="x_select", fields=['year'], bind=x_slider, value=1980) # Hover selection hover = alt.selection_point(on='mouseover', fields=['country'], empty=False) # A separate hover for the points since these need empty=True hover_point_opacity = alt.selection_point(on='mouseover', fields=['country']) # Search box for country name search_box = alt.param( value='', bind=alt.binding(input='search', placeholder="Country", name='Search ') ) # Base chart base = alt.Chart(source).encode( alt.X('fertility:Q').scale(zero=False).title('Babies per woman (total fertility rate)'), alt.Y('life_expect:Q').scale(zero=False).title('Life expectancy'), alt.Color('region:N').scale(scheme='dark2').legend(orient='bottom-left', titleFontSize=14, labelFontSize=12).title('Region'), alt.Detail('country:N') ).transform_calculate( region="""{ '0': 'South Asia', '1': 'Europe & Central Asia', '2': 'Sub-Saharan Africa', '3': 'The Americas', '4': 'East Asia & Pacific', '5': 'Middle East & North Africa' }[datum.cluster]""" ).transform_filter( # Exclude North Korea and South Korea due to source data error "datum.country !== 'North Korea' && datum.country !== 'South Korea'" ) search_matches = alt.expr.test(alt.expr.regexp(search_box, "i"), alt.datum.country) opacity = ( alt.when(hover_point_opacity, search_matches) .then(alt.value(0.8)) .otherwise(alt.value(0.1)) ) # Points that are always visible (filtered by slider and search) visible_points = base.mark_circle(size=100).encode( opacity=opacity ).transform_filter( x_select ).add_params( hover, hover_point_opacity, x_select ) when_hover = alt.when(hover) hover_line = alt.layer( # Line layer base.mark_trail().encode( alt.Order('year:Q').sort('ascending'), alt.Size('year:Q').scale(domain=[1955, 2005], range=[1, 12]).legend(None), opacity=when_hover.then(alt.value(0.3)).otherwise(alt.value(0)), color=alt.value('#222222') ), # Point layer base.mark_point(size=50).encode( opacity=when_hover.then(alt.value(0.8)).otherwise(alt.value(0)), ) ) # Year labels year_labels = base.mark_text(align='left', dx=5, dy=-5, fontSize=14).encode( text='year:O', color=alt.value('#222222') ).transform_filter(hover) # Country labels country_labels = alt.Chart(source).mark_text( align='left', dx=-15, dy=-25, fontSize=18, fontWeight='bold' ).encode( x='fertility:Q', y='life_expect:Q', text='country:N', color=alt.value('black'), opacity=when_hover.then(alt.value(1)).otherwise(alt.value(0)) ).transform_window( rank='rank(life_expect)', sort=[alt.SortField('life_expect', order='descending')], groupby=['country'] # places label atop highest point on y-axis on hover ).transform_filter( alt.datum.rank == 1 ).transform_aggregate( life_expect='max(life_expect)', fertility='max(fertility)', groupby=['country'] ) background_year = alt.Chart(source).mark_text( baseline='middle', fontSize=96, opacity=0.2 ).encode( text='year:O' ).transform_filter( x_select ).transform_aggregate( year='max(year)' ) # Combine all layers chart = alt.layer( visible_points, year_labels, country_labels, hover_line, background_year ).properties( width=500, height=500, padding=10 # Padding ensures labels fit ).configure_axis( labelFontSize=12, titleFontSize=12 ).add_params(search_box) chart ================================================ FILE: tests/examples_methods_syntax/scatter_with_layered_histogram.py ================================================ """ Interactive Scatter Plot and Linked Layered Histogram ===================================================== This example shows how to link a scatter plot and a histogram together such that clicking on a point in the scatter plot will isolate the distribution corresponding to that point, and vice versa. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np # generate fake data source = pd.DataFrame({ 'gender': ['M']*1000 + ['F']*1000, 'height':np.concatenate(( np.random.normal(69, 7, 1000), np.random.normal(64, 6, 1000) )), 'weight': np.concatenate(( np.random.normal(195.8, 144, 1000), np.random.normal(167, 100, 1000) )), 'age': np.concatenate(( np.random.normal(45, 8, 1000), np.random.normal(51, 6, 1000) )) }) selector = alt.selection_point(fields=['gender']) color_scale = alt.Scale(domain=['M', 'F'], range=['#1FC3AA', '#8624F5']) color = ( alt.when(selector) .then(alt.Color("gender:N").scale(color_scale)) .otherwise(alt.value("lightgray")) ) base = alt.Chart(source).properties( width=250, height=250 ).add_params(selector) points = base.mark_point(filled=True, size=200).encode( alt.X('mean(height):Q').scale(domain=[0,84]), alt.Y('mean(weight):Q').scale(domain=[0,250]), color=color, ) hists = base.mark_bar(opacity=0.5, thickness=100).encode( alt.X('age') .bin(step=5) # step keeps bin size the same .scale(domain=[0,100]), alt.Y('count()') .stack(None) .scale(domain=[0,350]), alt.Color('gender:N').scale(color_scale) ).transform_filter( selector ) points | hists ================================================ FILE: tests/examples_methods_syntax/scatter_with_minimap.py ================================================ """ Scatter Plot with Minimap ------------------------- This example shows how to create a miniature version of a plot such that creating a selection in the miniature version adjusts the axis limits in another, more detailed view. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.seattle_weather() zoom = alt.selection_interval(encodings=["x", "y"]) minimap = ( alt.Chart(source) .mark_point() .add_params(zoom) .encode( x="date:T", y="temp_max:Q", color=alt.when(zoom).then("weather").otherwise(alt.value("lightgray")), ) .properties( width=200, height=200, title="Minimap -- click and drag to zoom in the detail view", ) ) detail = ( alt.Chart(source) .mark_point() .encode( alt.X("date:T").scale(domain={"param": zoom.name, "encoding": "x"}), alt.Y("temp_max:Q").scale(domain={"param": zoom.name, "encoding": "y"}), color="weather", ) .properties(width=600, height=400, title="Seattle weather -- detail view") ) detail | minimap ================================================ FILE: tests/examples_methods_syntax/scatter_with_rolling_mean.py ================================================ """ Scatter Plot with Rolling Mean ------------------------------ A scatter plot with a rolling mean overlay. In this example a 30 day window is used to calculate the mean of the maximum temperature around each date. """ # category: scatter plots import altair as alt from altair.datasets import data source = data.seattle_weather() line = alt.Chart(source).mark_line( color='red', size=3 ).transform_window( rolling_mean='mean(temp_max)', frame=[-15, 15] ).encode( x='date:T', y='rolling_mean:Q' ) points = alt.Chart(source).mark_point().encode( x='date:T', y=alt.Y('temp_max:Q').title('Max Temp') ) points + line ================================================ FILE: tests/examples_methods_syntax/seattle_weather_interactive.py ================================================ """ Seattle Weather Interactive =========================== This chart provides an interactive exploration of Seattle weather over the course of the year. It includes a one-axis brush selection to easily see the distribution of weather types in a particular date range. """ # category: case studies import altair as alt from altair.datasets import data source = data.seattle_weather() color = alt.Color('weather:N').scale( domain=['sun', 'fog', 'drizzle', 'rain', 'snow'], range=['#e7ba52', '#a7a7a7', '#aec7e8', '#1f77b4', '#9467bd'] ) # We create two selections: # - a brush that is active on the top panel # - a multi-click that is active on the bottom panel brush = alt.selection_interval(encodings=['x']) click = alt.selection_point(encodings=['color']) # Top panel is scatter plot of temperature vs time points = alt.Chart().mark_point().encode( alt.X('monthdate(date):T').title('Date'), alt.Y('temp_max:Q') .title('Maximum Daily Temperature (C)') .scale(domain=[-5, 40]), alt.Size('precipitation:Q').scale(range=[5, 200]), color=alt.when(brush).then(color).otherwise(alt.value("lightgray")), ).properties( width=550, height=300 ).add_params( brush ).transform_filter( click ) # Bottom panel is a bar chart of weather type bars = alt.Chart().mark_bar().encode( x='count()', y='weather:N', color=alt.when(click).then(color).otherwise(alt.value("lightgray")), ).transform_filter( brush ).properties( width=550, ).add_params( click ) alt.vconcat( points, bars, data=source, title="Seattle Weather: 2012-2015" ) ================================================ FILE: tests/examples_methods_syntax/select_detail.py ================================================ """ Selection Detail ================ This example shows a selection that links two views of data: the left panel contains one point per object, and the right panel contains one line per object. Clicking on either the points or lines will select the corresponding objects in both views of the data. The challenge lies in expressing such hierarchical data in a way that Altair can handle. We do this by merging the data into a "long form" dataframe, and aggregating identical metadata for the final plot. """ # category: interactive charts import altair as alt import pandas as pd import numpy as np np.random.seed(0) n_objects = 20 n_times = 50 # Create one (x, y) pair of metadata per object locations = pd.DataFrame({ 'id': range(n_objects), 'x': np.random.randn(n_objects), 'y': np.random.randn(n_objects) }) # Create a 50-element time-series for each object timeseries = pd.DataFrame(np.random.randn(n_times, n_objects).cumsum(0), columns=locations['id'], index=pd.RangeIndex(0, n_times, name='time')) # Melt the wide-form timeseries into a long-form view timeseries = timeseries.reset_index().melt('time') # Merge the (x, y) metadata into the long-form view timeseries['id'] = timeseries['id'].astype(int) # make merge not complain data = pd.merge(timeseries, locations, on='id') # Data is prepared, now make a chart selector = alt.selection_point(fields=['id']) color = ( alt.when(selector) .then(alt.Color("id:O").legend(None)) .otherwise(alt.value("lightgray")) ) base = alt.Chart(data).properties( width=250, height=250 ).add_params(selector) points = base.mark_point(filled=True, size=200).encode( x='mean(x)', y='mean(y)', color=color, ) line = base.mark_line().encode( x='time', y=alt.Y('value').scale(domain=(-15, 15)), color=alt.Color('id:O').legend(None) ).transform_filter( selector ) points | line ================================================ FILE: tests/examples_methods_syntax/simple_scatter_with_errorbars.py ================================================ """ Simple Scatter Plot with Errorbars ---------------------------------- A simple scatter plot of a data set with errorbars. """ # category: uncertainties and trends import altair as alt import pandas as pd import numpy as np # generate some data points with uncertainties np.random.seed(0) x = [1, 2, 3, 4, 5] y = np.random.normal(10, 0.5, size=len(x)) yerr = 0.2 # set up data frame source = pd.DataFrame({"x": x, "y": y, "yerr": yerr}) # the base chart base = alt.Chart(source).transform_calculate( ymin="datum.y-datum.yerr", ymax="datum.y+datum.yerr" ) # generate the points points = base.mark_point( filled=True, size=50, color='black' ).encode( alt.X('x').scale(domain=(0, 6)), alt.Y('y').scale(zero=False) ) # generate the error bars errorbars = base.mark_errorbar().encode( x="x", y="ymin:Q", y2="ymax:Q" ) points + errorbars ================================================ FILE: tests/examples_methods_syntax/sorted_error_bars_with_ci.py ================================================ """ Sorted Error Bars showing Confidence Interval ============================================= This example shows how to show error bars using confidence intervals, while also sorting the y-axis based on x-axis values. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.barley() points = alt.Chart(source).mark_point( filled=True, color='black' ).encode( x=alt.X('mean(yield)').title('Barley Yield'), y=alt.Y('variety').sort( field='yield', op='mean', order='descending' ) ).properties( width=400, height=250 ) error_bars = points.mark_rule().encode( x='ci0(yield)', x2='ci1(yield)', ) points + error_bars ================================================ FILE: tests/examples_methods_syntax/stacked_bar_chart_sorted_segments.py ================================================ """ Stacked Bar Chart with Sorted Segments -------------------------------------- This is an example of a stacked-bar chart with the segments of each bar resorted. """ # category: bar charts import altair as alt from altair.datasets import data source = data.barley() alt.Chart(source).mark_bar().encode( x='sum(yield)', y='variety', color='site', order=alt.Order( # Sort the segments of the bars by this field 'site', sort='ascending' ) ) ================================================ FILE: tests/examples_methods_syntax/stacked_bar_chart_with_text.py ================================================ """ Stacked Bar Chart with Text Overlay =================================== This example shows how to overlay text on a stacked bar chart. For both the bar and text marks, we use the ``stack`` argument in the ``x`` encoding to cause the values to be stacked horizontally. """ # category: bar charts import altair as alt from altair.datasets import data source=data.barley() bars = alt.Chart(source).mark_bar().encode( x=alt.X('sum(yield):Q').stack('zero'), y=alt.Y('variety:N'), color=alt.Color('site') ) text = alt.Chart(source).mark_text(dx=-15, dy=3, color='white').encode( x=alt.X('sum(yield):Q').stack('zero'), y=alt.Y('variety:N'), detail='site:N', text=alt.Text('sum(yield):Q', format='.1f') ) bars + text ================================================ FILE: tests/examples_methods_syntax/stem_and_leaf.py ================================================ """ Stem and Leaf Plot ------------------ This example shows how to make a stem and leaf plot. """ # category: advanced calculations import altair as alt import pandas as pd import numpy as np np.random.seed(42) # Generating random data source = pd.DataFrame({'samples': np.random.normal(50, 15, 100).astype(int).astype(str)}) # Splitting stem and leaf source['stem'] = source['samples'].str[:-1] source['leaf'] = source['samples'].str[-1] source = source.sort_values(by=['stem', 'leaf']) # Determining leaf position source['position'] = source.groupby('stem').cumcount().add(1) # Creating stem and leaf plot alt.Chart(source).mark_text( align='left', baseline='middle', dx=-5 ).encode( alt.X('position:Q') .title('') .axis(ticks=False, labels=False, grid=False), alt.Y('stem:N') .title('') .axis(tickSize=0), text='leaf:N', ).configure_axis( labelFontSize=20 ).configure_text( fontSize=20 ) ================================================ FILE: tests/examples_methods_syntax/streamgraph.py ================================================ """ Streamgraph ----------------- This example shows the streamgraph from vega-lite examples. """ # category: area charts import altair as alt from altair.datasets import data source = data.unemployment_across_industries.url alt.Chart(source).mark_area().encode( alt.X('yearmonth(date):T').axis(format='%Y', domain=False, tickSize=0), alt.Y('sum(count):Q').stack('center').axis(None), alt.Color('series:N').scale(scheme='category20b') ).interactive() ================================================ FILE: tests/examples_methods_syntax/strip_plot_jitter.py ================================================ """ Strip Plot with Jitter ---------------------- In this chart, we encode the ``Major_Genre`` column from the ``movies`` dataset in the ``y``-channel. In the default presentation of this data, it would be difficult to gauge the relative frequencies with which different values occur because there would be so much overlap. To address this, we use the ``yOffset`` channel to incorporate a random offset (jittering). The example is shown twice, on the left side using normally distributed and on the right side using uniformally distributed jitter. """ # category: distributions import altair as alt from altair.datasets import data source = data.movies.url gaussian_jitter = alt.Chart(source, title='Normally distributed jitter').mark_circle(size=8).encode( y="Major Genre:N", x="IMDB Rating:Q", yOffset="jitter:Q", color=alt.Color('Major Genre:N').legend(None) ).transform_calculate( # Generate Gaussian jitter with a Box-Muller transform jitter="sqrt(-2*log(random()))*cos(2*PI*random())" ) uniform_jitter = gaussian_jitter.transform_calculate( # Generate uniform jitter jitter='random()' ).encode( alt.Y('Major Genre:N').axis(None) ).properties( title='Uniformly distributed jitter' ) (gaussian_jitter | uniform_jitter).resolve_scale(yOffset='independent') ================================================ FILE: tests/examples_methods_syntax/top_k_items.py ================================================ """ Top K Items ----------- This example shows how to use the window and transformation filter to display the Top items of a long list of items in decreasing order. Here we sort the top 10 highest ranking movies of IMDB. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.movies.url # Top 10 movies by IMBD rating alt.Chart( source, ).mark_bar().encode( alt.X('Title:N').sort('-y'), alt.Y('IMDB Rating:Q'), alt.Color('IMDB Rating:Q') ).transform_window( rank='rank(IMDB Rating)', sort=[alt.SortField('IMDB Rating', order='descending')] ).transform_filter( (alt.datum.rank < 10) ) ================================================ FILE: tests/examples_methods_syntax/top_k_letters.py ================================================ """ Top K Letters ------------- This example shows how to use a window transform in order to display only the top K categories by number of entries. In this case, we rank the characters in the first paragraph of Dickens' *A Tale of Two Cities* by number of occurrences. """ # category: advanced calculations import altair as alt import pandas as pd import numpy as np # Excerpt from A Tale of Two Cities; public domain text text = """ It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. """ source = pd.DataFrame( {'letters': np.array([c for c in text if c.isalpha()])} ) alt.Chart(source).transform_aggregate( count='count()', groupby=['letters'] ).transform_window( rank='rank(count)', sort=[alt.SortField('count', order='descending')] ).transform_filter( alt.datum.rank < 10 ).mark_bar().encode( y=alt.Y('letters:N').sort('-x'), x='count:Q', ) ================================================ FILE: tests/examples_methods_syntax/top_k_with_others.py ================================================ """ Top-K Plot with Others ---------------------- This example shows how to use aggregate, window, and calculate transformations to display the top-k directors by average worldwide gross while grouping the remaining directors as 'All Others'. """ # category: advanced calculations import altair as alt from altair.datasets import data source = data.movies.url alt.Chart(source).mark_bar().encode( alt.X("aggregate_gross:Q").aggregate("mean").title(None), alt.Y("ranked_director:N") .sort(op="mean", field="aggregate_gross", order="descending") .title(None) ).transform_aggregate( aggregate_gross='mean(Worldwide Gross)', groupby=["Director"], ).transform_window( rank='row_number()', sort=[alt.SortField("aggregate_gross", order="descending")], ).transform_calculate( ranked_director="datum.rank < 10 ? datum.Director : 'All Others'" ).properties( title="Top Directors by Average Worldwide Gross", ) ================================================ FILE: tests/examples_methods_syntax/us_employment.py ================================================ """ The U.S. Employment Crash During the Great Recession ---------------------------------------------------- This example is a fully developed bar chart with negative values using the sample dataset of U.S. employment changes during the Great Recession. """ # category: case studies import altair as alt import pandas as pd from altair.datasets import data source = data.us_employment() presidents = pd.DataFrame([ { "start": "2006-01-01", "end": "2009-01-19", "president": "Bush" }, { "start": "2009-01-20", "end": "2015-12-31", "president": "Obama" } ]) predicate = alt.datum.nonfarm_change > 0 bars = alt.Chart( source, title="The U.S. employment crash during the Great Recession" ).mark_bar().encode( alt.X("month:T").title(""), alt.Y("nonfarm_change:Q").title("Change in non-farm employment (in thousands)"), color=alt.when(predicate).then(alt.value("steelblue")).otherwise(alt.value("orange")), ) rule = alt.Chart(presidents).mark_rule( color="black", strokeWidth=2 ).encode( x='end:T' ).transform_filter(alt.datum.president == "Bush") text = alt.Chart(presidents).mark_text( align='left', baseline='middle', dx=7, dy=-135, size=11 ).encode( x='start:T', text='president', color=alt.value('#000000') ) (bars + rule + text).properties(width=600) ================================================ FILE: tests/examples_methods_syntax/us_population_over_time.py ================================================ """ US Population by Age and Sex ============================ This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. """ # category: case studies import altair as alt from altair.datasets import data source = data.population.url select_year = alt.selection_point( name="Year", fields=["year"], bind=alt.binding_range(min=1900, max=2000, step=10, name="Year"), value=2000, ) alt.Chart(source).mark_bar().encode( alt.X("sex:N").title('').axis(labels=False, ticks=False), alt.Y("people:Q").scale(domain=(0, 12000000)).title("Population"), alt.Color("sex:N") .scale(domain=("Male", "Female"), range=["steelblue", "salmon"]) .title("Sex"), alt.Column("age:O").title("Age") ).properties( width=20, title="U.S. Population by Age and Sex" ).add_params( select_year ).transform_calculate( "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female") ).transform_filter( select_year ).configure_facet( spacing=8 ) ================================================ FILE: tests/examples_methods_syntax/us_population_over_time_facet.py ================================================ """ US Population: Wrapped Facet ============================ This chart visualizes the age distribution of the US population over time, using a wrapped faceting of the data by decade. """ # category: case studies import altair as alt from altair.datasets import data source = data.population.url alt.Chart(source).mark_area().encode( x='age:O', y=alt.Y('sum(people):Q').title('Population').axis(format='~s'), facet=alt.Facet('year:O').columns(5), ).properties( title='US Age Distribution By Year', width=90, height=80 ) ================================================ FILE: tests/examples_methods_syntax/us_population_pyramid_over_time.py ================================================ ''' US Population Pyramid Over Time =============================== A population pyramid shows the distribution of age groups within a population. It uses a slider widget that is bound to the year to visualize the age distribution over time. ''' # category: case studies import altair as alt from altair.datasets import data source = data.population.url slider = alt.binding_range(min=1850, max=2000, step=10) select_year = alt.selection_point(name='year', fields=['year'], bind=slider, value=2000) base = alt.Chart(source).add_params( select_year ).transform_filter( select_year ).transform_calculate( gender=alt.expr.if_(alt.datum.sex == 1, 'Male', 'Female') ).properties( width=250 ) color_scale = alt.Scale(domain=['Male', 'Female'], range=['#1f77b4', '#e377c2']) left = base.transform_filter( alt.datum.gender == 'Female' ).encode( alt.Y('age:O').axis(None), alt.X('sum(people):Q') .title('population') .sort('descending'), alt.Color('gender:N') .scale(color_scale) .legend(None) ).mark_bar().properties(title='Female') middle = base.encode( alt.Y('age:O').axis(None), alt.Text('age:Q'), ).mark_text().properties(width=20) right = base.transform_filter( alt.datum.gender == 'Male' ).encode( alt.Y('age:O').axis(None), alt.X('sum(people):Q').title('population'), alt.Color('gender:N').scale(color_scale).legend(None) ).mark_bar().properties(title='Male') alt.concat(left, middle, right, spacing=5) ================================================ FILE: tests/examples_methods_syntax/us_state_capitals.py ================================================ """ U.S. State Capitals Overlaid on a Map of the U.S ------------------------------------------------- This is a layered geographic visualization that shows US capitals overlaid on a map. """ # category: case studies import altair as alt from altair.datasets import data states = alt.topo_feature(data.us_10m.url, 'states') capitals = data.us_state_capitals.url # US states background background = alt.Chart(states).mark_geoshape( fill='lightgray', stroke='white' ).properties( title='US State Capitols', width=650, height=400 ).project('albersUsa') # Points and text hover = alt.selection_point(on='pointerover', nearest=True, fields=['lat', 'lon']) base = alt.Chart(capitals).encode( longitude='lon:Q', latitude='lat:Q', ) text = base.mark_text(dy=-5, align='right').encode( alt.Text('city:N'), opacity=alt.when(~hover).then(alt.value(0)).otherwise(alt.value(1)) ) points = base.mark_point().encode( color=alt.value('black'), size=alt.when(~hover).then(alt.value(30)).otherwise(alt.value(100)) ).add_params(hover) background + points + text ================================================ FILE: tests/examples_methods_syntax/violin_plot.py ================================================ """ Violin Plot ----------- This example shows how to make a Violin Plot using Altair's density transform. """ # category: distributions import altair as alt from altair.datasets import data alt.Chart(data.cars(), width=100).transform_density( 'Miles_per_Gallon', as_=['Miles_per_Gallon', 'density'], extent=[5, 50], groupby=['Origin'] ).mark_area(orient='horizontal').encode( alt.X('density:Q') .stack('center') .impute(None) .title(None) .axis(labels=False, values=[0], grid=False, ticks=True), alt.Y('Miles_per_Gallon:Q'), alt.Color('Origin:N'), alt.Column('Origin:N') .spacing(0) .header(titleOrient='bottom', labelOrient='bottom', labelPadding=0) ).configure_view( stroke=None ) ================================================ FILE: tests/examples_methods_syntax/wheat_wages.py ================================================ """ Wheat and Wages --------------- A recreation of William Playfair's classic chart visualizing the price of wheat, the wages of a mechanic, and the reigning British monarch. This is a more polished version of the simpler chart in :ref:`gallery_bar_and_line_with_dual_axis`. """ # category: case studies import altair as alt import pandas as pd from altair.datasets import data base_wheat = alt.Chart(data.wheat.url).transform_calculate(year_end="+datum.year + 5") base_monarchs = alt.Chart(data.monarchs.url).transform_calculate( offset="((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95", off2="((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95", y="95", x="+datum.start + (+datum.end - +datum.start)/2", ) bars = base_wheat.mark_bar(fill="#aaa", stroke="#999").encode( alt.X("year:Q").bin("binned").axis(format="d", tickCount=5).scale(zero=False), alt.Y("wheat:Q").axis(zindex=1), alt.X2("year_end"), ) section_data = pd.DataFrame( [ {"year": 1600}, {"year": 1650}, {"year": 1700}, {"year": 1750}, {"year": 1800}, ] ) section_line = ( alt.Chart(section_data) .mark_rule(stroke="#000", strokeWidth=0.6, opacity=0.7) .encode(alt.X("year")) ) area = base_wheat.mark_area(color="#a4cedb", opacity=0.7).encode( alt.X("year:Q"), alt.Y("wages:Q") ) area_line_1 = area.mark_line(color="#000", opacity=0.7) area_line_2 = area.mark_line(yOffset=-2, color="#EE8182") top_bars = base_monarchs.mark_bar(stroke="#000").encode( alt.X("start:Q"), alt.X2("end"), alt.Y("y:Q"), alt.Y2("offset"), alt.Fill("commonwealth:N").legend(None).scale(range=["black", "white"]), ) top_text = base_monarchs.mark_text(yOffset=14, fontSize=9, fontStyle="italic").encode( alt.X("x:Q"), alt.Y("off2:Q"), alt.Text("name:N") ) ( (bars + section_line + area + area_line_1 + area_line_2 + top_bars + top_text) .properties(width=900, height=400) .configure_axis(title=None, gridColor="white", gridOpacity=0.25, domain=False) .configure_view(stroke="transparent") ) ================================================ FILE: tests/examples_methods_syntax/wilkinson-dot-plot.py ================================================ """ Wilkinson Dot Plot ------------------ An example of a `Wilkinson Dot Plot `_ """ # category: advanced calculations import altair as alt import pandas as pd source = pd.DataFrame( {"data":[1,1,1,1,1,1,1,1,1,1, 2,2,2, 3,3, 4,4,4,4,4,4] } ) alt.Chart(source, height=100).mark_circle(opacity=1).transform_window( id='rank()', groupby=['data'] ).encode( alt.X('data:O'), alt.Y('id:O').axis(None).sort('descending') ) ================================================ FILE: tests/examples_methods_syntax/wind_vector_map.py ================================================ """ Wind Vector Map --------------- An example showing a vector array map showing wind speed and direction using ``wedge`` as shape for ``mark_point`` and ``angle`` encoding for the wind direction. This is adapted from this corresponding Vega-Lite Example: `Wind Vector Map `_ with an added base map. """ # category: maps import altair as alt from altair.datasets import data df_wind = data.windvectors() data_world = alt.topo_feature(data.world_110m.url, "countries") wedge = alt.Chart(df_wind).mark_point(shape="wedge", filled=True).encode( alt.Latitude("latitude"), alt.Longitude("longitude"), alt.Color("dir") .scale(domain=[0, 360], scheme="rainbow") .legend(None), alt.Angle("dir").scale(domain=[0, 360], range=[180, 540]), alt.Size("speed").scale(rangeMax=500) ).project("equalEarth") xmin, xmax, ymin, ymax = ( df_wind.longitude.min(), df_wind.longitude.max(), df_wind.latitude.min(), df_wind.latitude.max(), ) # extent as feature or featurecollection extent = { "type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[ [xmax, ymax], [xmax, ymin], [xmin, ymin], [xmin, ymax], [xmax, ymax]]] }, "properties": {} } # use fit combined with clip=True base = ( alt.Chart(data_world) .mark_geoshape(clip=True, fill="lightgray", stroke="black", strokeWidth=0.5) .project(type="equalEarth", fit=extent) ) base + wedge ================================================ FILE: tests/examples_methods_syntax/window_rank.py ================================================ """ Window Rank Line Chart ---------------------- This example shows the Group F rankings in the 2018 World Cup after each matchday. A window transformation is used to rank each after each match day, sorting by points and difference. """ # category: line charts import altair as alt import pandas as pd source = pd.DataFrame( [ {"team": "Germany", "matchday": 1, "point": 0, "diff": -1}, {"team": "Germany", "matchday": 2, "point": 3, "diff": 0}, {"team": "Germany", "matchday": 3, "point": 3, "diff": -2}, {"team": "Mexico", "matchday": 1, "point": 3, "diff": 1}, {"team": "Mexico", "matchday": 2, "point": 6, "diff": 2}, {"team": "Mexico", "matchday": 3, "point": 6, "diff": -1}, {"team": "South Korea", "matchday": 1, "point": 0, "diff": -1}, {"team": "South Korea", "matchday": 2, "point": 0, "diff": -2}, {"team": "South Korea", "matchday": 3, "point": 3, "diff": 0}, {"team": "Sweden", "matchday": 1, "point": 3, "diff": 1}, {"team": "Sweden", "matchday": 2, "point": 3, "diff": 0}, {"team": "Sweden", "matchday": 3, "point": 6, "diff": 3}, ] ) color_scale = alt.Scale( domain=["Germany", "Mexico", "South Korea", "Sweden"], range=["#000000", "#127153", "#C91A3C", "#0C71AB"], ) alt.Chart(source).mark_line().encode( x="matchday:O", y="rank:O", color=alt.Color("team:N").scale(color_scale) ).transform_window( rank="rank()", sort=[ alt.SortField("point", order="descending"), alt.SortField("diff", order="descending"), ], groupby=["matchday"], ).properties(title="World Cup 2018: Group F Rankings") ================================================ FILE: tests/expr/__init__.py ================================================ ================================================ FILE: tests/expr/test_expr.py ================================================ from __future__ import annotations import datetime as dt import operator import sys from inspect import classify_class_attrs, getmembers, signature from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np import pytest from jsonschema.exceptions import ValidationError from altair import datum, expr, ExprRef from altair.expr import _ExprMeta from altair.expr.core import Expression, GetAttrExpression if TYPE_CHECKING: from collections.abc import Callable, Iterable, Iterator from inspect import _IntrospectableCallable T = TypeVar("T") # This maps vega expression function names to the Python name VEGA_REMAP = {"if_": "if"} def _is_property(obj: Any, /) -> bool: return isinstance(obj, property) def _get_property_names(tp: type[Any], /) -> Iterator[str]: for nm, _ in getmembers(tp, _is_property): yield nm def signature_n_params( obj: _IntrospectableCallable, /, *, exclude: Iterable[str] = frozenset(("cls", "self")), ) -> int: sig = signature(obj) return len(set(sig.parameters).difference(exclude)) def _iter_classmethod_specs( tp: type[T], / ) -> Iterator[tuple[str, Callable[..., Expression], int]]: for m in classify_class_attrs(tp): if m.kind == "class method" and m.defining_class is tp: name = m.name fn = cast("classmethod[T, ..., Expression]", m.object).__func__ yield (VEGA_REMAP.get(name, name), fn.__get__(tp), signature_n_params(fn)) def test_unary_operations(): OP_MAP = {"-": operator.neg, "+": operator.pos} for op, func in OP_MAP.items(): z = func(datum.xxx) assert repr(z) == f"({op}datum.xxx)" def test_binary_operations(): OP_MAP = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "%": operator.mod, "===": operator.eq, "<": operator.lt, "<=": operator.le, ">": operator.gt, ">=": operator.ge, "!==": operator.ne, "&&": operator.and_, "||": operator.or_, } # When these are on the RHS, the opposite is evaluated instead. INEQ_REVERSE = { ">": "<", "<": ">", "<=": ">=", ">=": "<=", "===": "===", "!==": "!==", } for op, func in OP_MAP.items(): z1 = func(datum.xxx, 2) assert repr(z1) == f"(datum.xxx {op} 2)" z2 = func(2, datum.xxx) if op in INEQ_REVERSE: assert repr(z2) == f"(datum.xxx {INEQ_REVERSE[op]} 2)" else: assert repr(z2) == f"(2 {op} datum.xxx)" z3 = func(datum.xxx, datum.yyy) assert repr(z3) == f"(datum.xxx {op} datum.yyy)" def test_abs(): z = abs(datum.xxx) assert repr(z) == "abs(datum.xxx)" @pytest.mark.parametrize(("veganame", "fn", "n_params"), _iter_classmethod_specs(expr)) def test_expr_methods( veganame: str, fn: Callable[..., Expression], n_params: int ) -> None: datum_names = [f"col_{n}" for n in range(n_params)] datum_args = ",".join(f"datum.{nm}" for nm in datum_names) fn_call = fn(*(GetAttrExpression("datum", nm) for nm in datum_names)) assert repr(fn_call) == f"{veganame}({datum_args})" @pytest.mark.parametrize("constname", _get_property_names(_ExprMeta)) def test_expr_consts(constname: str): """Test all constants defined in expr.consts.""" const = getattr(expr, constname) z = const * datum.xxx assert repr(z) == f"({constname} * datum.xxx)" @pytest.mark.parametrize("constname", _get_property_names(_ExprMeta)) def test_expr_consts_immutable(constname: str): """Ensure e.g `alt.expr.PI = 2` is prevented.""" if sys.version_info >= (3, 11): pattern = f"property {constname!r}.+has no setter" else: pattern = f"can't set attribute {constname!r}" with pytest.raises(AttributeError, match=pattern): setattr(expr, constname, 2) def test_json_reprs(): """Test JSON representations of special values.""" assert repr(datum.xxx == None) == "(datum.xxx === null)" # noqa: E711 assert repr(datum.xxx == False) == "(datum.xxx === false)" # noqa: E712 assert repr(datum.xxx == True) == "(datum.xxx === true)" # noqa: E712 assert repr(datum.xxx == np.int64(0)) == "(datum.xxx === 0)" def test_to_dict(): ex = datum.xxx * 2 > datum.yyy assert ex.to_dict() == repr(ex) def test_copy(): ex = datum.xxx * 2 > abs(datum.yyy) ex_copy = ex.copy() assert ex.to_dict() == ex_copy.to_dict() def test_datum_getattr(): x = datum["foo"] assert repr(x) == "datum['foo']" magic_attr = "__magic__" with pytest.raises(AttributeError): getattr(datum, magic_attr) def test_expression_getitem(): x = datum.foo[0] assert repr(x) == "datum.foo[0]" def test_expression_function_expr(): # test including an expr. should return an ExprRef er = expr(expr.PI * 2) assert isinstance(er, ExprRef) assert repr(er) == "ExprRef({\n expr: (PI * 2)\n})" def test_expression_function_string(): # expr() can only work with str er = expr("2 * 2") assert isinstance(er, ExprRef) assert repr(er) == "ExprRef({\n expr: '2 * 2'\n})" def test_expression_function_nostring(): # expr() can only work with str otherwise # should raise a SchemaValidationError with pytest.raises(ValidationError): expr(2 * 2) # pyright: ignore with pytest.raises(ValidationError): expr(["foo", "bah"]) # pyright: ignore @pytest.mark.parametrize( ("value", "expected"), [ (dt.date(2000, 1, 1), "datetime(2000,0,1)"), (dt.datetime(2000, 1, 1), "datetime(2000,0,1,0,0,0,0)"), (dt.datetime(2001, 1, 1, 9, 30, 0, 2999), "datetime(2001,0,1,9,30,0,2)"), ( dt.datetime(2003, 5, 1, 1, 30, tzinfo=dt.timezone.utc), "utc(2003,4,1,1,30,0,0)", ), ], ids=["date", "datetime (no time)", "datetime (microseconds)", "datetime (UTC)"], ) def test_expr_datetime(value: Any, expected: str) -> None: r_datum = datum.date >= value assert isinstance(r_datum, Expression) assert repr(r_datum) == f"(datum.date >= {expected})" @pytest.mark.parametrize( "tzinfo", [ dt.timezone(dt.timedelta(hours=2), "UTC+2"), dt.timezone(dt.timedelta(hours=1), "BST"), dt.timezone(dt.timedelta(hours=-7), "pdt"), dt.timezone(dt.timedelta(hours=-3), "BRT"), dt.timezone(dt.timedelta(hours=9), "UTC"), dt.timezone(dt.timedelta(minutes=60), "utc"), ], ) def test_expr_datetime_unsupported_timezone(tzinfo: dt.timezone) -> None: datetime = dt.datetime(2003, 5, 1, 1, 30) result = datum.date == datetime assert repr(result) == "(datum.date === datetime(2003,4,1,1,30,0,0))" with pytest.raises(TypeError, match=r"Unsupported timezone.+\n.+UTC.+local"): datum.date == datetime.replace(tzinfo=tzinfo) # noqa: B015 ================================================ FILE: tests/test_datasets.py ================================================ from __future__ import annotations import datetime as dt import re import sys from functools import partial from importlib import import_module from importlib.util import find_spec from pathlib import Path from typing import TYPE_CHECKING, Any, cast, get_args from urllib.error import URLError import pytest from narwhals.stable import v1 as nw from narwhals.stable.v1 import dependencies as nw_dep from altair.datasets import Loader from altair.datasets._exceptions import AltairDatasetsError from altair.datasets._typing import Dataset, Metadata from tests import no_xdist, skip_requires_geopandas, skip_requires_pyarrow if TYPE_CHECKING: from collections.abc import Mapping from pathlib import Path from typing import Literal, TypeAlias import pandas as pd import polars as pl from _pytest.mark import ParameterSet # pyright: ignore[reportPrivateImportUsage] from altair.datasets._reader import _Backend, _PandasAny, _Polars, _PyArrow from altair.vegalite.v6.schema._typing import OneOrSeq PolarsLoader: TypeAlias = Loader[pl.DataFrame, pl.LazyFrame] # ============================================================================= # Test Configuration and Fixtures # ============================================================================= datasets_debug: pytest.MarkDecorator = pytest.mark.datasets_debug() """ Custom ``pytest.mark`` decorator. Use for more exhaustive tests that require many requests. **Disabled** by default in ``pyproject.toml``: [tool.pytest.ini_options] addopts = ... """ _backend_params: Mapping[_Backend, ParameterSet] = { "polars": pytest.param("polars"), "pandas": pytest.param("pandas"), "pandas[pyarrow]": pytest.param("pandas[pyarrow]", marks=skip_requires_pyarrow()), "pyarrow": pytest.param("pyarrow", marks=skip_requires_pyarrow()), } backends: pytest.MarkDecorator = pytest.mark.parametrize( "backend", _backend_params.values() ) backends_no_polars: pytest.MarkDecorator = pytest.mark.parametrize( "backend", [v for k, v in _backend_params.items() if k != "polars"] ) backends_pandas_any: pytest.MarkDecorator = pytest.mark.parametrize( "backend", [v for k, v in _backend_params.items() if "pandas" in k] ) backends_pyarrow: pytest.MarkDecorator = pytest.mark.parametrize( "backend", [v for k, v in _backend_params.items() if k == "pyarrow"] ) datasets_all: pytest.MarkDecorator = pytest.mark.parametrize("name", get_args(Dataset)) datasets_spatial: pytest.MarkDecorator = pytest.mark.parametrize( "name", ["earthquakes", "london_boroughs", "london_tube_lines", "us_10m", "world_110m"], ) CACHE_ENV_VAR: Literal["ALTAIR_DATASETS_DIR"] = "ALTAIR_DATASETS_DIR" @pytest.fixture(scope="session") def polars_loader() -> PolarsLoader: """Fastest and **most reliable** backend.""" load = Loader.from_backend("polars") if load.cache.is_not_active(): load.cache.path = load.cache._XDG_CACHE return load @pytest.fixture def metadata_columns() -> frozenset[str]: return Metadata.__required_keys__.union(Metadata.__optional_keys__) # ============================================================================= # Utility Functions # ============================================================================= def is_frame_backend(frame: Any, backend: _Backend, /) -> bool: pandas_any: set[_PandasAny] = {"pandas", "pandas[pyarrow]"} if backend in pandas_any: return nw_dep.is_pandas_dataframe(frame) elif backend == "pyarrow": return nw_dep.is_pyarrow_table(frame) elif backend == "polars": return nw_dep.is_polars_dataframe(frame) else: raise TypeError(backend) def is_loader_backend(loader: Loader[Any, Any], backend: _Backend, /) -> bool: return repr(loader) == f"{type(loader).__name__}[{backend}]" def is_polars_backed_pyarrow(loader: Loader[Any, Any], /) -> bool: """ User requested ``pyarrow``, but also has ``polars`` installed. Both support nested datatypes, which are required for spatial json. """ return ( is_loader_backend(loader, "pyarrow") and "earthquakes" in loader._reader.profile()["supported"] ) def is_geopandas_backed_pandas(loader: Loader[Any, Any], /) -> bool: return ( is_loader_backend(loader, "pandas") or is_loader_backend(loader, "pandas[pyarrow]") ) and "earthquakes" in loader._reader.profile()["supported"] # ============================================================================= # Backend and Loader Tests # ============================================================================= @backends def test_metadata_columns(backend: _Backend, metadata_columns: frozenset[str]) -> None: """Ensure all backends will query the same column names.""" load = Loader.from_backend(backend) schema_columns = load._reader._scan_metadata().collect().columns assert set(schema_columns) == metadata_columns @backends def test_loader_from_backend(backend: _Backend) -> None: load = Loader.from_backend(backend) assert is_loader_backend(load, backend) @backends def test_loader_url(backend: _Backend) -> None: load = Loader.from_backend(backend) url = load.url("volcano") assert isinstance(url, str) assert "vega-datasets" in url @no_xdist def test_load_infer_priority(monkeypatch: pytest.MonkeyPatch) -> None: """ Ensure the **most reliable**, available backend is selected. See Also -------- ``altair.datasets._reader.infer_backend`` """ import altair.datasets._loader from altair.datasets import load assert is_loader_backend(load, "polars") monkeypatch.delattr(altair.datasets._loader, "load", raising=False) monkeypatch.setitem(sys.modules, "polars", None) from altair.datasets import load if find_spec("pyarrow") is None: # NOTE: We can end the test early for the CI job that removes `pyarrow` assert is_loader_backend(load, "pandas") monkeypatch.delattr(altair.datasets._loader, "load") monkeypatch.setitem(sys.modules, "pandas", None) with pytest.raises(AltairDatasetsError, match=r"no.+backend"): from altair.datasets import load else: assert is_loader_backend(load, "pandas[pyarrow]") monkeypatch.delattr(altair.datasets._loader, "load") monkeypatch.setitem(sys.modules, "pyarrow", None) from altair.datasets import load assert is_loader_backend(load, "pandas") monkeypatch.delattr(altair.datasets._loader, "load") monkeypatch.setitem(sys.modules, "pandas", None) monkeypatch.delitem(sys.modules, "pyarrow") monkeypatch.setitem(sys.modules, "pyarrow", import_module("pyarrow")) from altair.datasets import load assert is_loader_backend(load, "pyarrow") monkeypatch.delattr(altair.datasets._loader, "load") monkeypatch.setitem(sys.modules, "pyarrow", None) with pytest.raises(AltairDatasetsError, match=r"no.+backend"): from altair.datasets import load @backends def test_load_call(backend: _Backend, monkeypatch: pytest.MonkeyPatch) -> None: """Test that the load function can be called with different backends.""" import altair.datasets._loader monkeypatch.delattr(altair.datasets._loader, "load", raising=False) from altair.datasets import load assert is_loader_backend(load, "polars") default = load("cars") df = load("cars", backend=backend) default_2 = load("cars") assert nw_dep.is_polars_dataframe(default) assert is_frame_backend(df, backend) assert nw_dep.is_polars_dataframe(default_2) @backends def test_loader_call(backend: _Backend) -> None: load = Loader.from_backend(backend) if backend == "pyarrow": # PyArrow has a known limitation with non-ISO date formats in CSV # The stocks dataset has dates like "Jan 1 2000" which PyArrow cannot parse # This should raise an informative AltairDatasetsError with pytest.raises( AltairDatasetsError, match="PyArrow cannot parse date format" ): load("stocks", ".csv") else: # Other backends should work normally frame = load("stocks", ".csv") assert nw_dep.is_into_dataframe(frame) nw_frame = nw.from_native(frame) assert set(nw_frame.columns) == {"symbol", "date", "price"} # ============================================================================= # URL and Dataset Discovery Tests # ============================================================================= def test_url_no_backend(monkeypatch: pytest.MonkeyPatch) -> None: from altair.datasets._cache import csv_cache from altair.datasets._reader import infer_backend priority: Any = ( "nonexistent_mod_1", "nonexistent_mod_2", "nonexistent_mod_3", "nonexistent_mod_4", ) assert csv_cache._mapping == {} with pytest.raises(AltairDatasetsError): infer_backend(priority=priority) url = csv_cache.url # Test that URLs are valid strings pointing to vega-datasets assert isinstance(url("jobs"), str) assert "vega-datasets" in url("jobs") assert csv_cache._mapping != {} # Test a few representative datasets instead of all 15+ assert isinstance(url("cars"), str) assert "vega-datasets" in url("cars") assert isinstance(url("flights_10k"), str) assert "vega-datasets" in url("flights_10k") if find_spec("vegafusion"): assert isinstance(url("flights_3m"), str) assert "vega-datasets" in url("flights_3m") with monkeypatch.context() as mp: mp.setitem(sys.modules, "vegafusion", None) with pytest.raises(AltairDatasetsError, match=r".parquet.+require.+vegafusion"): url("flights_3m") with pytest.raises( TypeError, match="'nonexistent data' does not refer to a known dataset" ): url("nonexistent data") # ============================================================================= # Error Handling and Edge Cases # ============================================================================= @backends def test_dataset_not_found(backend: _Backend) -> None: """Various queries that should **always raise** due to non-existent dataset.""" load = Loader.from_backend(backend) real_name: Literal["disasters"] = "disasters" nonexistent_name: Literal["nonexistent name"] = "nonexistent name" unsupported_suffix: Literal["unsupported suffix"] = "unsupported suffix" incorrect_suffix: Literal[".json"] = ".json" ERR_NO_RESULT = ValueError MSG_NO_RESULT = "Found no results for" NAME = "dataset_name" SUFFIX = "suffix" with pytest.raises( ERR_NO_RESULT, match=re.compile(rf"{MSG_NO_RESULT}.+{NAME}.+{nonexistent_name}", re.DOTALL), ): load.url(nonexistent_name) with pytest.raises( TypeError, match=re.compile( rf"Expected '{SUFFIX}' to be one of.+\(.+\).+but got.+{unsupported_suffix}", re.DOTALL, ), ): load.url(real_name, unsupported_suffix) # type: ignore[arg-type] with pytest.raises( ERR_NO_RESULT, match=re.compile( rf"{MSG_NO_RESULT}.+{NAME}.+{real_name}.+{SUFFIX}.+{incorrect_suffix}", re.DOTALL, ), ): load.url(real_name, incorrect_suffix) def test_reader_missing_dependencies() -> None: from altair.datasets._reader import _import_guarded nonexistent_name = "not_a_real_package" real_name = "altair" nonexistent_extra = "AnotherNonexistentPackage" backend = f"{real_name}[{nonexistent_extra}]" with pytest.raises( ModuleNotFoundError, match=re.compile( rf"{nonexistent_name}.+requires.+{nonexistent_name}.+but.+{nonexistent_name}.+not.+found.+pip install {nonexistent_name}", flags=re.DOTALL, ), ): _import_guarded(nonexistent_name) # type: ignore with pytest.raises( ModuleNotFoundError, match=re.compile( rf"{re.escape(backend)}.+requires.+'{real_name}', '{nonexistent_extra}'.+but.+{nonexistent_extra}.+not.+found.+pip install {nonexistent_extra}", flags=re.DOTALL, ), ): _import_guarded(backend) # type: ignore def test_reader_missing_implementation() -> None: from altair.datasets._constraints import is_csv from altair.datasets._reader import reader from altair.datasets._readimpl import read def func(*args, **kwds) -> pd.DataFrame: if TYPE_CHECKING: return pd.DataFrame() name = "pandas" rd = reader((read(func, is_csv),), name=name) with pytest.raises( AltairDatasetsError, match=re.compile(rf"Unable.+parquet.+native.+{name}", flags=re.DOTALL), ): rd.dataset("flights_3m") with pytest.raises( AltairDatasetsError, match=re.compile(r"Found no.+support.+flights.+json", flags=re.DOTALL), ): rd.dataset("flights_2k") with pytest.raises( AltairDatasetsError, match=re.compile(r"Image data is non-tabular") ): rd.dataset("icon_7zip") # ============================================================================= # Caching Tests # ============================================================================= @backends def test_reader_cache( backend: _Backend, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: """Ensure cache hits avoid network activity.""" import polars as pl from polars.testing import assert_frame_equal monkeypatch.setenv(CACHE_ENV_VAR, str(tmp_path)) load = Loader.from_backend(backend) assert load.cache.is_active() cache_dir = load.cache.path assert cache_dir == tmp_path assert tuple(load.cache) == () # Use smaller datasets for faster testing lookup_groups = load("lookup_groups") load("lookup_people") load("iowa_electricity") load("global_temp") cached_paths = tuple(load.cache) assert len(cached_paths) == 4 if nw_dep.is_polars_dataframe(lookup_groups): left, right = ( lookup_groups, cast("pl.DataFrame", load("lookup_groups", ".csv")), ) else: left, right = ( pl.DataFrame(lookup_groups), pl.DataFrame(load("lookup_groups", ".csv")), ) assert_frame_equal(left, right) assert len(tuple(load.cache)) == 4 assert cached_paths == tuple(load.cache) load("iowa_electricity", ".csv") load("global_temp", ".csv") load("global-temp.csv") assert len(tuple(load.cache)) == 4 assert cached_paths == tuple(load.cache) load("lookup_people") load("lookup_people.csv") load("lookup_people", ".csv") load("lookup_people") assert len(tuple(load.cache)) == 4 assert cached_paths == tuple(load.cache) @datasets_debug @backends def test_reader_cache_exhaustive( backend: _Backend, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, polars_loader: PolarsLoader, ) -> None: """ Fully populate and then purge the cache for all backends. Notes ----- - Does not attempt to read the files - Checking we can support pre-downloading and safely deleting - Requests work the same for all backends - The logic for detecting the cache contents uses ``narwhals`` - Here, we're testing that these ``narwhals`` operations are consistent - `DatasetCache.download_all` is expensive for CI, so aiming for it to run **at most once** - 34-45s per call (4x backends) """ polars_loader.cache.download_all() CLONED: Path = tmp_path / "clone" CLONED.mkdir(exist_ok=True) # Copy the cache contents import shutil shutil.copytree(polars_loader.cache.path, CLONED, dirs_exist_ok=True) monkeypatch.setenv(CACHE_ENV_VAR, str(tmp_path)) load = Loader.from_backend(backend) assert load.cache.is_active() cache_dir = load.cache.path assert cache_dir == tmp_path assert tuple(load.cache) == (CLONED,) load.cache.path = CLONED cached_paths = tuple(load.cache) assert cached_paths != () # NOTE: Approximating all datasets downloaded (minimum expected count) assert len(cached_paths) >= 70 assert all(bool(fp.exists() and fp.stat().st_size) for fp in load.cache) # NOTE: Confirm this is a no-op (already downloaded) load.cache.download_all() assert len(cached_paths) == len(tuple(load.cache)) # NOTE: Ensure unrelated files in the directory are not removed during cache clearing test_file: Path = tmp_path / "test_file.json" test_file.touch(exist_ok=False) load.cache.clear() remaining = tuple(tmp_path.iterdir()) assert set(remaining) == {test_file, CLONED} test_file.unlink() # Remove the test file shutil.rmtree(CLONED) # Remove the cloned directory @no_xdist def test_reader_cache_disable(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: from altair.datasets import load monkeypatch.setenv(CACHE_ENV_VAR, str(tmp_path)) assert load.cache.is_active() assert load.cache.path == tmp_path assert load.cache.is_empty() load("cars") assert not load.cache.is_empty() # ISSUE: https://github.com/python/mypy/issues/3004 load.cache.path = None assert load.cache.is_not_active() with pytest.raises( ValueError, match=re.compile( rf"Cache.+unset.+{CACHE_ENV_VAR}.+\.cache\.path =", flags=re.DOTALL ), ): tuple(load.cache) load.cache.path = tmp_path assert load.cache.is_active() assert load.cache.path == tmp_path assert not load.cache.is_empty() # ============================================================================= # Format-Specific Tests # ============================================================================= @pytest.mark.parametrize( "name", ["cars", "movies", "wheat", "barley", "gapminder", "income", "burtin"] ) @pytest.mark.parametrize("fallback", ["polars", None]) @backends_pyarrow def test_pyarrow_read_json( backend: _PyArrow, fallback: _Polars | None, name: Dataset, monkeypatch: pytest.MonkeyPatch, ) -> None: if fallback is None: monkeypatch.setitem(sys.modules, "polars", None) load = Loader.from_backend(backend) assert load(name, ".json") @datasets_spatial @backends_no_polars @pytest.mark.geospatial @skip_requires_geopandas def test_spatial(backend: _Backend, name: Dataset) -> None: load = Loader.from_backend(backend) # Specify layer parameter for datasets with multiple layers to avoid warnings layer_kwargs = {} if name == "us_10m": layer_kwargs = {"layer": "counties"} elif name == "world_110m": layer_kwargs = {"layer": "countries"} if is_polars_backed_pyarrow(load): assert nw_dep.is_pyarrow_table(load(name, **layer_kwargs)) elif is_geopandas_backed_pandas(load): import geopandas assert isinstance(load(name, **layer_kwargs), geopandas.GeoDataFrame) else: pattern = re.compile( rf"{name}.+geospatial.+native.+{re.escape(backend)}.+try.+polars.+url", flags=re.DOTALL | re.IGNORECASE, ) with pytest.raises(AltairDatasetsError, match=pattern): load(name, **layer_kwargs) @backends def test_tsv(backend: _Backend) -> None: load = Loader.from_backend(backend) is_frame_backend(load("unemployment", ".tsv"), backend) # ============================================================================= # Comprehensive Dataset Tests # ============================================================================= @datasets_all @datasets_debug def test_all_datasets(polars_loader: PolarsLoader, name: Dataset) -> None: """ Test that all datasets can be loaded with the polars backend. - For image files (e.g., icon_7zip, ffox, gimp), we expect an error because these are not tabular data. The error message should be clear and helpful, and this is the correct behavior. - Dataset names are valid Python identifiers, but the URLs may differ; we do not test URL construction here. - This test checks Altair's integration with the datasets API, not the validity of upstream datasets or backends. """ if name in {"icon_7zip", "ffox", "gimp"}: # These are image files that should raise an error when loaded as tabular data # The error message contains the actual filename (e.g., '7zip.png', 'ffox.png', 'gimp.png') pattern = re.compile( r"Unable to load '.+\.png' as tabular data", flags=re.DOTALL | re.IGNORECASE, ) with pytest.raises(AltairDatasetsError, match=pattern): polars_loader(name) else: frame = polars_loader(name) assert nw_dep.is_polars_dataframe(frame) # ============================================================================= # Network and Connection Tests # ============================================================================= def _raise_exception(e: type[Exception], *args: Any, **kwds: Any): raise e(*args, **kwds) def test_no_remote_connection(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: from polars.testing import assert_frame_equal load = Loader.from_backend("polars") load.cache.path = tmp_path load("london_centroids") load("stocks") load("driving") cached_paths = tuple(tmp_path.iterdir()) assert len(cached_paths) == 3 raiser = partial(_raise_exception, URLError) with monkeypatch.context() as mp: mp.setattr(load._reader._opener, "open", raiser) # Existing cache entries don't trigger an error load("london_centroids") load("stocks") load("driving") # Mocking cache-miss without remote conn with pytest.raises(URLError): load("birdstrikes") assert len(tuple(tmp_path.iterdir())) == 3 # Now we can get a cache-hit frame = load("birdstrikes") assert nw_dep.is_polars_dataframe(frame) assert len(tuple(tmp_path.iterdir())) == 4 with monkeypatch.context() as mp: mp.setattr(load._reader._opener, "open", raiser) # Here, the remote conn isn't considered - we already have the file frame_from_cache = load("birdstrikes") assert len(tuple(tmp_path.iterdir())) == 4 assert_frame_equal(frame, frame_from_cache) # ============================================================================= # Data Type and Schema Tests # ============================================================================= @pytest.mark.parametrize( ("name", "column"), [ ("cars", "Year"), ("unemployment_across_industries", "date"), ("flights_10k", "date"), ("football", "date"), ("crimea", "date"), ("ohlc", "date"), ], ) def test_polars_date_read_json_roundtrip( polars_loader: PolarsLoader, name: Dataset, column: str ) -> None: """Ensure ``date`` columns are inferred using the roundtrip json -> csv method.""" frame = polars_loader(name, ".json") tp = frame.schema.to_python()[column] assert tp is dt.date or issubclass(tp, dt.date) @backends_pandas_any @pytest.mark.parametrize( ("name", "columns"), [ ("birdstrikes", "Flight Date"), ("cars", "Year"), ("co2_concentration", "Date"), ("crimea", "date"), ("football", "date"), ("iowa_electricity", "year"), ("la_riots", "death_date"), ("ohlc", "date"), ("seattle_weather_hourly_normals", "date"), ("seattle_weather", "date"), ("sp500_2000", "date"), ("unemployment_across_industries", "date"), ("us_employment", "month"), ], ) def test_pandas_date_parse( backend: _PandasAny, name: Dataset, columns: OneOrSeq[str], polars_loader: PolarsLoader, ) -> None: """ Ensure schema defaults are correctly parsed. Notes ----- - Depends on ``frictionless`` being able to detect the date/datetime columns. - Not all format strings work """ date_columns: list[str] = [columns] if isinstance(columns, str) else list(columns) load = Loader.from_backend(backend) url = load.url(name) kwds: dict[str, Any] = ( {"convert_dates": date_columns} if url.endswith(".json") else {"parse_dates": date_columns} ) df_schema_derived: pd.DataFrame = load(name) nw_schema = nw.from_native(df_schema_derived).schema df_manually_specified: pd.DataFrame = load(name, **kwds) assert set(date_columns).issubset(nw_schema) for column in date_columns: assert nw_schema[column] in {nw.Date, nw.Datetime} assert nw_schema == nw.from_native(df_manually_specified).schema # We do not assert that loading with parse_dates=[]/convert_dates=[] yields a # different schema: backends may still infer date columns from the file. # NOTE: Checking `polars` infers the same[1] as what `pandas` needs a hint for # [1] Doesn't need to be exact, just recognize as *some kind* of date/datetime pl_schema: pl.Schema = polars_loader(name).schema for column in date_columns: assert pl_schema[column].is_temporal() # ============================================================================= # Data API Tests # ============================================================================= class TestDataObject: """Test the main DataObject functionality.""" def test_list_datasets(self) -> None: """Test that list_datasets returns a list of available datasets.""" from altair.datasets import data datasets = data.list_datasets() assert isinstance(datasets, list) assert len(datasets) > 0 # Check that common datasets are present common_datasets = ["cars", "movies", "stocks", "penguins"] for dataset in common_datasets: if dataset in datasets: break else: pytest.fail("No common datasets found in list_datasets") def test_get_default_engine(self) -> None: """Test getting the default engine.""" from altair.datasets import data default_engine = data.get_default_engine() assert default_engine in {"pandas", "polars", "pandas[pyarrow]", "pyarrow"} def test_set_default_engine(self) -> None: """Test setting the default engine.""" from altair.datasets import data original_engine = data.get_default_engine() data.set_default_engine("polars") assert data.get_default_engine() == "polars" data.set_default_engine("pandas") assert data.get_default_engine() == "pandas" data.set_default_engine(original_engine) def test_nonexistent_dataset_attribute(self): from altair.datasets import data with pytest.raises( AttributeError, match="Dataset 'nonexistent_dataset' not found" ): # NOTE: Needing a type ignore here is a good thing _ = data.nonexistent_dataset # pyright: ignore[reportArgumentType] class TestDataAPIIntegration: """Test integration scenarios with the data API.""" def test_data_consistency(self) -> None: """Test that data loaded through different methods is consistent.""" from altair.datasets import data # Load through data API cars_data_api = data.cars() # Load through direct loader from altair.datasets import Loader loader = Loader.from_backend("pandas") cars_loader = loader("cars") # Both should have the same number of rows assert len(cars_data_api) == len(cars_loader) def test_unsupported_engine(): """Test that unsupported engine raises appropriate error.""" from altair.datasets import data with pytest.raises(TypeError, match="Unknown backend"): # NOTE: Needing a type ignore here is a good thing data.cars(engine="unsupported_engine") # pyright: ignore[reportArgumentType, reportCallIssue] ================================================ FILE: tests/test_examples.py ================================================ """ Note that this module dominates the testing time. TODO ---- - Research how this could be done with fixtures. Other optimization ideas ------------------ Cache the calls to `compile` in `altair.utils.execeval` - The each file has every expression compiled 3x times - Would immediately reduce to 1x - Possible there are overlapping expressions between `examples_arguments_syntax` and `examples_methods_syntax` - Could lead to further performance gains - All of the tests only call `eval_block` to operate on the finished chart - The need to execute the code is not what is being tested """ from __future__ import annotations import io from typing import Any import altair as alt from altair.utils.execeval import eval_block from tests import ( distributed_examples, ignore_DataFrameGroupBy, skip_requires_vl_convert, slow, ) @ignore_DataFrameGroupBy @distributed_examples def test_render_examples_to_chart(source: Any, filename: str) -> None: chart = eval_block(source) if chart is None: msg = f"Example file {filename} should define chart in its final statement." raise ValueError(msg) try: assert isinstance(chart.to_dict(), dict) except Exception as err: msg = ( f"Example file {filename} raised an exception when " f"converting to a dict: {err}" ) raise AssertionError(msg) from err @ignore_DataFrameGroupBy @distributed_examples def test_from_and_to_json_roundtrip(source: Any, filename: str) -> None: """ Tests if the to_json and from_json work for all examples in the Example Gallery. (and by extension to_dict and from_dict) """ chart = eval_block(source) if chart is None: msg = f"Example file {filename} should define chart in its final statement." raise ValueError(msg) try: first_json = chart.to_json() reconstructed_chart = alt.Chart.from_json(first_json) # As the chart objects are not # necessarily the same - they could use different objects to encode the same # information - we do not test for equality of the chart objects, but rather # for equality of the json strings. second_json = reconstructed_chart.to_json() assert first_json == second_json except Exception as err: msg = ( f"Example file {filename} raised an exception when " f"doing a json conversion roundtrip: {err}" ) raise AssertionError(msg) from err @slow @ignore_DataFrameGroupBy @distributed_examples @skip_requires_vl_convert def test_render_examples_to_png(source: Any, filename: str) -> None: chart = eval_block(source) if chart is None: msg = f"Example file {filename} should define chart in its final statement." raise ValueError(msg) out = io.BytesIO() chart.save(out, format="png", engine="vl-convert") buf = out.getbuffer() prefix = buf[:4].tobytes() assert prefix == b"\x89PNG" ================================================ FILE: tests/test_jupyter_chart.py ================================================ from importlib.metadata import version as importlib_version import pandas as pd import pytest from packaging.version import Version import altair as alt from altair.datasets import data # If anywidget is not installed, we will skip the tests in this file. try: import anywidget # noqa: F401 has_anywidget = True except ImportError: has_anywidget = False if has_anywidget: from altair.jupyter import jupyter_chart else: jupyter_chart = None # type: ignore skip_requires_anywidget = pytest.mark.skipif( not has_anywidget, reason="anywidget not importable" ) try: import vegafusion # noqa: F401 transformers = ["default", "vegafusion"] except ImportError: transformers = ["default"] param_transformers = pytest.mark.parametrize("transformer", transformers) if Version(importlib_version("ipywidgets")) < Version("8.1.4"): # See https://github.com/vega/altair/issues/3234#issuecomment-2268515312 _filterwarn = pytest.mark.filterwarnings( "ignore:Deprecated in traitlets 4.1.*:DeprecationWarning" ) jupyter_marks: pytest.MarkDecorator = skip_requires_anywidget( _filterwarn(param_transformers) ) else: jupyter_marks = skip_requires_anywidget(param_transformers) @jupyter_marks def test_chart_with_no_interactivity(transformer): with alt.data_transformers.enable(transformer): source = pd.DataFrame( { "a": ["A", "B", "C", "D", "E", "F", "G", "H", "I"], "b": [28, 55, 43, 91, 81, 53, 19, 87, 52], } ) chart = alt.Chart(source).mark_bar().encode(x="a", y="b") widget = alt.JupyterChart(chart) if transformer == "vegafusion": # With the "vegafusion" transformer, the spec is not computed until the front-end # sets the local_tz. Assign this property manually to simulate this. widget.local_tz = "UTC" assert widget.spec == chart.to_dict(format="vega") else: assert widget.spec == chart.to_dict() # There should be no params or selections initialized assert len(widget.selections.trait_values()) == 0 assert len(widget.params.trait_values()) == 0 @jupyter_marks def test_interval_selection_example(transformer): with alt.data_transformers.enable(transformer): source = data.cars() brush = alt.selection_interval(name="interval") chart = ( alt.Chart(source) .mark_point() .encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", color=alt.condition(brush, "Cylinders:O", alt.value("grey")), ) .add_params(brush) ) widget = alt.JupyterChart(chart) if transformer == "vegafusion": widget.local_tz = "UTC" assert widget.spec == chart.to_dict(format="vega") else: assert widget.spec == chart.to_dict() # There should be one selection and zero params assert len(widget.selections.trait_values()) == 1 assert len(widget.params.trait_values()) == 0 # Check initial interval selection selection = widget.selections.interval assert isinstance(selection, jupyter_chart.IntervalSelection) assert selection.value == {} assert selection.store == [] # Simulate Vega signal update store = [ { "unit": "", "fields": [ {"field": "Horsepower", "channel": "x", "type": "R"}, {"field": "Miles_per_Gallon", "channel": "y", "type": "R"}, ], "values": [ [40.0, 100], [25, 30], ], } ] widget._vl_selections = { "interval": { "value": { "Horsepower": [40.0, 100], "Miles_per_Gallon": [25, 30], }, "store": store, } } selection = widget.selections.interval assert isinstance(selection, jupyter_chart.IntervalSelection) assert selection.value == { "Horsepower": [40.0, 100], "Miles_per_Gallon": [25, 30], } assert selection.store == store @jupyter_marks def test_index_selection_example(transformer): with alt.data_transformers.enable(transformer): source = data.cars() brush = alt.selection_point(name="index") chart = ( alt.Chart(source) .mark_point() .encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", color=alt.condition(brush, "Cylinders:O", alt.value("grey")), ) .add_params(brush) ) widget = alt.JupyterChart(chart) if transformer == "vegafusion": widget.local_tz = "UTC" assert widget.spec == chart.to_dict(format="vega") else: assert widget.spec == chart.to_dict() # There should be one selection and zero params assert len(widget.selections.trait_values()) == 1 assert len(widget.params.trait_values()) == 0 # Check initial interval selection selection = widget.selections.index assert isinstance(selection, jupyter_chart.IndexSelection) assert selection.value == [] assert selection.store == [] # Simulate Vega signal update store = [ {"unit": "", "_vgsid_": 220}, {"unit": "", "_vgsid_": 330}, {"unit": "", "_vgsid_": 341}, ] widget._vl_selections = { "index": { "value": { "_vgsid_": "Set(220,330,341)", "vlPoint": { "or": [{"_vgsid_": 220}, {"_vgsid_": 330}, {"_vgsid_": 341}] }, }, "store": store, } } selection = widget.selections.index assert isinstance(selection, jupyter_chart.IndexSelection) assert selection.value == [219, 329, 340] assert selection.store == store @jupyter_marks def test_point_selection(transformer): with alt.data_transformers.enable(transformer): source = data.cars() brush = alt.selection_point(name="point", encodings=["color"], bind="legend") chart = ( alt.Chart(source) .mark_point() .encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", color=alt.condition(brush, "Cylinders:O", alt.value("grey")), ) .add_params(brush) ) widget = alt.JupyterChart(chart) if transformer == "vegafusion": widget.local_tz = "UTC" assert widget.spec == chart.to_dict(format="vega") else: assert widget.spec == chart.to_dict() # There should be one selection and zero params assert len(widget.selections.trait_values()) == 1 assert len(widget.params.trait_values()) == 0 # Check initial interval selection selection = widget.selections.point assert isinstance(selection, jupyter_chart.PointSelection) assert selection.value == [] assert selection.store == [] # Simulate Vega signal update store = [ { "fields": [{"field": "Cylinders", "channel": "color", "type": "E"}], "values": [4], }, { "fields": [{"field": "Cylinders", "channel": "color", "type": "E"}], "values": [5], }, ] widget._vl_selections = { "point": { "value": { "Cylinders": [4, 5], "vlPoint": {"or": [{"Cylinders": 4}, {"Cylinders": 5}]}, }, "store": store, } } selection = widget.selections.point assert isinstance(selection, jupyter_chart.PointSelection) assert selection.value == [{"Cylinders": 4}, {"Cylinders": 5}] assert selection.store == store @jupyter_marks def test_param_updates(transformer): with alt.data_transformers.enable(transformer): source = data.cars() size_param = alt.param( name="size", value=10, bind=alt.binding_range(min=1, max=100) ) chart = ( alt.Chart(source) .mark_point() .encode(x="Horsepower:Q", y="Miles_per_Gallon:Q", size=size_param) .add_params(size_param) ) widget = alt.JupyterChart(chart) # There should be one param and zero selections assert len(widget.selections.trait_values()) == 0 assert len(widget.params.trait_values()) == 1 # Initial value should match what was provided assert widget.params.size == 10 # Update param from python widget.params.size = 50 assert widget.params.size == 50 ================================================ FILE: tests/test_magics.py ================================================ from __future__ import annotations import json from typing import TYPE_CHECKING, Any import pytest from altair.vegalite.v6.display import VegaLite from tests import skip_requires_ipython if TYPE_CHECKING: from IPython.core.interactiveshell import InteractiveShell @pytest.fixture def records() -> list[dict[str, Any]]: return [ {"amount": 28, "category": "A"}, {"amount": 55, "category": "B"}, {"amount": 43, "category": "C"}, {"amount": 91, "category": "D"}, {"amount": 81, "category": "E"}, {"amount": 53, "category": "F"}, {"amount": 19, "category": "G"}, {"amount": 87, "category": "H"}, ] @pytest.fixture def vl_spec(records) -> dict[str, Any]: return { "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "data": {"values": records}, "description": "A simple bar chart with embedded data.", "encoding": { "x": {"field": "category", "type": "ordinal"}, "y": {"field": "amount", "type": "quantitative"}, }, "mark": {"type": "bar"}, } @pytest.fixture def ipshell(records) -> InteractiveShell: from IPython.core.interactiveshell import InteractiveShell shell = InteractiveShell.instance() shell.run_cell("%load_ext altair") shell.run_cell( f"import pandas as pd\n" f"table = pd.DataFrame.from_records({records})\n" f"the_data = table" ) return shell @skip_requires_ipython def test_vegalite_magic_data_included(ipshell, vl_spec) -> None: result = ipshell.run_cell("%%vegalite\n" + json.dumps(vl_spec)) assert isinstance(result.result, VegaLite) assert result.result.spec == vl_spec @skip_requires_ipython def test_vegalite_magic_json_flag(ipshell, vl_spec) -> None: result = ipshell.run_cell("%%vegalite --json\n" + json.dumps(vl_spec)) assert isinstance(result.result, VegaLite) assert result.result.spec == vl_spec @skip_requires_ipython def test_vegalite_magic_pandas_data(ipshell, vl_spec) -> None: spec = {key: val for key, val in vl_spec.items() if key != "data"} result = ipshell.run_cell("%%vegalite table\n" + json.dumps(spec)) assert isinstance(result.result, VegaLite) assert result.result.spec == vl_spec ================================================ FILE: tests/test_toplevel.py ================================================ import altair as alt from tools import update_init_file def test_completeness_of__all__(): relevant_attributes = update_init_file.relevant_attributes(alt.__dict__) # If the assert statement fails below, there are probably either new objects # in the top-level Altair namespace or some were removed. # In that case, run `hatch run update-init-file` to update __all__ assert alt.__all__ == relevant_attributes ================================================ FILE: tests/test_transformed_data.py ================================================ import pkgutil import sys from importlib.metadata import version from importlib.util import find_spec import narwhals.stable.v1 as nw import pytest from packaging.version import Version import altair as alt from altair.datasets import data from altair.utils.execeval import eval_block from tests import ( examples_methods_syntax, ignore_DataFrameGroupBy, skip_requires_vegafusion, slow, ) XDIST_ENABLED: bool = "xdist" in sys.modules """Use as an `xfail` condition, if running in parallel may cause the test to fail.""" xfail_vegafusion_2: pytest.MarkDecorator = pytest.mark.xfail( bool(find_spec("vegafusion")) and Version(version("vegafusion")) >= Version("2.0.0a0"), raises=ValueError, reason="https://github.com/vega/altair/issues/3701", ) # fmt: off @ignore_DataFrameGroupBy @skip_requires_vegafusion @pytest.mark.parametrize(("filename", "rows", "cols"), [ ("annual_weather_heatmap.py", 366, ["monthdate_date_end", "max_temp_max"]), ("anscombe_plot.py", 44, ["Series", "X", "Y"]), ("bar_chart_sorted.py", 6, ["site", "sum_yield"]), ("bar_chart_faceted_compact.py", 27, ["p", "p_end"]), ("beckers_barley_facet.py", 120, ["year", "site"]), ("beckers_barley_wrapped_facet.py", 120, ["site", "median_yield"]), ("bump_chart.py", 96, ["rank", "yearmonth_date"]), ("comet_chart.py", 120, ["variety", "delta"]), ("diverging_stacked_bar_chart.py", 40, ["value", "percentage_start"]), ("donut_chart.py", 6, ["value_start", "value_end"]), ("gapminder_bubble_plot.py", 187, ["income", "population"]), ("grouped_bar_chart2.py", 9, ["Group", "Value_start"]), ("hexbins.py", 84, ["xFeaturePos", "mean_temp_max"]), pytest.param("histogram_heatmap.py", 378, ["bin_maxbins_40_Rotten Tomatoes Rating", "__count"], marks=slow), ("histogram_scatterplot.py", 64, ["bin_maxbins_10_Rotten Tomatoes Rating", "__count"]), pytest.param("interactive_legend.py", 1708, ["sum_count_start", "series"], marks=slow), ("iowa_electricity.py", 51, ["net_generation_start", "year"]), ("isotype.py", 37, ["animal", "x"]), ("isotype_grid.py", 100, ["row", "col"]), ("lasagna_plot.py", 492, ["yearmonthdate_date", "sum_price"]), ("layered_area_chart.py", 51, ["source", "net_generation"]), ("layered_bar_chart.py", 51, ["source", "net_generation"]), ("layered_histogram.py", 113, ["bin_maxbins_100_Measurement"]), ("line_chart_with_cumsum.py", 52, ["cumulative_wheat"]), ("line_custom_order.py", 55, ["miles", "gas"]), pytest.param("line_percent.py", 30, ["sex", "perc"], marks=slow), ("line_with_log_scale.py", 15, ["year", "sum_people"]), ("multifeature_scatter_plot.py", 342, ["Beak Depth (mm)", "Species"]), pytest.param("natural_disasters.py", 686, ["Deaths", "Year"], marks=xfail_vegafusion_2), ("normalized_stacked_area_chart.py", 51, ["source", "net_generation_start"]), ("normalized_stacked_bar_chart.py", 60, ["site", "sum_yield_start"]), ("parallel_coordinates.py", 1032, ["key", "value"]), ("percentage_of_total.py", 5, ["PercentOfTotal", "TotalTime"]), ("pie_chart.py", 6, ["category", "value_start"]), ("pyramid.py", 3, ["category", "value_start"]), ("stacked_bar_chart_sorted_segments.py", 60, ["variety", "site"]), ("stem_and_leaf.py", 100, ["stem", "leaf"]), pytest.param("streamgraph.py", 1708, ["series", "sum_count"], marks=slow), ("top_k_items.py", 10, ["rank", "IMDB Rating_start"]), ("top_k_letters.py", 9, ["rank", "letters"]), pytest.param("top_k_with_others.py", 10, ["ranked_director", "mean_aggregate_gross"], marks=slow), ("area_faceted.py", 492, ["date", "price"]), ("distributions_faceted_histogram.py", 20, ["Origin", "__count"]), ("us_population_over_time.py", 38, ["sex", "people_start"]), ("us_population_over_time_facet.py", 285, ["year", "sum_people"]), ("wilkinson-dot-plot.py", 21, ["data", "id"]), ("window_rank.py", 12, ["team", "diff"]), ]) @pytest.mark.parametrize("to_reconstruct", [True, False]) def test_primitive_chart_examples(filename, rows, cols, to_reconstruct): # fmt: on source = pkgutil.get_data(examples_methods_syntax.__name__, filename) chart = eval_block(source, strict=True) if to_reconstruct: # When reconstructing a Chart, Altair uses different classes # then what might have been originally used. See # https://github.com/hex-inc/vegafusion/issues/354 for more info. chart = alt.Chart.from_dict(chart.to_dict()) df = chart.transformed_data() assert df is not None nw_df = nw.from_native(df, eager_only=True) assert len(nw_df) == rows assert set(cols).issubset(set(nw_df.columns)) # fmt: off @skip_requires_vegafusion @pytest.mark.parametrize(("filename", "all_rows", "all_cols"), [ ("errorbars_with_std.py", [10, 10], [["upper_yield"], ["extent_yield"]]), ("candlestick_chart.py", [44, 44], [["low"], ["close"]]), ("co2_concentration.py", [741, 8, 8], [["first_date"], ["scaled_date"], ["end"]]), pytest.param("falkensee.py", [2, 38, 38], [["event"], ["population"], ["population"]], marks=xfail_vegafusion_2), ("heat_lane.py", [10, 10], [["bin_count_start"], ["y2"]]), ("histogram_responsive.py", [23, 23], [["__count"], ["__count"]]), ("histogram_with_a_global_mean_overlay.py", [9, 1], [["__count"], ["mean_IMDB Rating"]]), ("horizon_graph.py", [20, 20], [["x"], ["ny"]]), pytest.param("interactive_cross_highlight.py", [64, 64, 13], [["__count"], ["__count"], ["Major Genre"]], marks=slow), ("interval_selection.py", [123, 123], [["price_start"], ["date"]]), ("layered_chart_with_dual_axis.py", [12, 12], [["month_date"], ["average_precipitation"]]), ("layered_heatmap_text.py", [9, 9], [["Cylinders"], ["mean_horsepower"]]), ("multiline_highlight.py", [560, 560], [["price"], ["date"]]), ("multiline_tooltip.py", [300, 300, 300, 0, 300], [["x"], ["y"], ["y"], ["x"], ["x"]]), ("pie_chart_with_labels.py", [6, 6], [["category"], ["value"]]), ("radial_chart.py", [6, 6], [["values"], ["values_start"]]), ("scatter_linked_table.py", [392, 14, 14, 14], [["Year"], ["Year"], ["Year"], ["Year"]]), ("scatter_marginal_hist.py", [24, 342, 33], [["__count"], ["Species"], ["__count"]]), pytest.param( "scatter_with_layered_histogram.py", [2, 19], [["gender"], ["__count"]], marks=(slow, pytest.mark.xfail( XDIST_ENABLED, reason="Possibly `numpy` conflict with `xdist`.\n" "Very intermittent, but only affects `to_reconstruct=False`." )), ), ("scatter_with_minimap.py", [1461, 1461], [["date"], ["date"]]), ("scatter_with_rolling_mean.py", [1461, 1461], [["date"], ["rolling_mean"]]), ("seattle_weather_interactive.py", [1461, 5], [["date"], ["__count"]]), ("select_detail.py", [20, 1000], [["id"], ["x"]]), ("simple_scatter_with_errorbars.py", [5, 5], [["x"], ["upper_ymin"]]), ("stacked_bar_chart_with_text.py", [60, 60], [["site"], ["site"]]), ("us_employment.py", [120, 1, 2], [["month"], ["president"], ["president"]]), ("us_population_pyramid_over_time.py", [19, 38, 19], [["gender"], ["year"], ["gender"]]), ]) @pytest.mark.parametrize("to_reconstruct", [True, False]) def test_compound_chart_examples(filename, all_rows, all_cols, to_reconstruct): # fmt: on source = pkgutil.get_data(examples_methods_syntax.__name__, filename) chart = eval_block(source, strict=True) if to_reconstruct: # When reconstructing a Chart, Altair uses different classes # then what might have been originally used. See # https://github.com/hex-inc/vegafusion/issues/354 for more info. chart = alt.Chart.from_dict(chart.to_dict()) assert isinstance(chart, (alt.LayerChart, alt.ConcatChart, alt.HConcatChart, alt.VConcatChart)) dfs = chart.transformed_data() if not to_reconstruct: # Only run assert statements if the chart is not reconstructed. Reason # is that for some charts, the original chart contained duplicated datasets # which disappear when reconstructing the chart. nw_dfs = (nw.from_native(d, eager_only=True) for d in dfs) assert len(dfs) == len(all_rows) for df, rows, cols in zip(nw_dfs, all_rows, all_cols, strict=False): assert len(df) == rows assert set(cols).issubset(set(df.columns)) @skip_requires_vegafusion @pytest.mark.parametrize("to_reconstruct", [True, False]) def test_transformed_data_exclude(to_reconstruct): source = data.wheat() bar = alt.Chart(source).mark_bar().encode(x="year:O", y="wheat:Q") rule = alt.Chart(source).mark_rule(color="red").encode(y="mean(wheat):Q") some_annotation = ( alt.Chart(name="some_annotation") .mark_text(fontWeight="bold") .encode(text=alt.value("Just some text"), y=alt.datum(85), x=alt.value(200)) ) chart = (bar + rule + some_annotation).properties(width=600) if to_reconstruct: # When reconstructing a Chart, Altair uses different classes # then what might have been originally used. See # https://github.com/hex-inc/vegafusion/issues/354 for more info. chart = alt.Chart.from_dict(chart.to_dict()) assert isinstance(chart, alt.LayerChart) datasets = chart.transformed_data(exclude=["some_annotation"]) _datasets = [nw.from_native(d, eager_only=True) for d in datasets] assert len(datasets) == len(_datasets) assert len(_datasets) == 2 assert len(_datasets[0]) == 52 assert "wheat_start" in _datasets[0] assert len(_datasets[1]) == 1 assert "mean_wheat" in _datasets[1] ================================================ FILE: tests/utils/__init__.py ================================================ ================================================ FILE: tests/utils/test_compiler.py ================================================ import json import pytest from altair import Chart, vegalite_compilers from tests import skip_requires_vl_convert @pytest.fixture def chart(): return ( Chart("cars.json") .mark_point() .encode( x="Horsepower:Q", y="Miles_per_Gallon:Q", ) ) def assert_is_vega_spec(vega_spec): assert vega_spec["$schema"] == "https://vega.github.io/schema/vega/v6.json" assert "data" in vega_spec assert "marks" in vega_spec assert "scales" in vega_spec assert "axes" in vega_spec @skip_requires_vl_convert def test_vegalite_compiler(chart): vegalite_spec = chart.to_dict() fn = vegalite_compilers.get() assert fn is not None vega_spec = fn(vegalite_spec) assert_is_vega_spec(vega_spec) @skip_requires_vl_convert def test_to_dict_with_format_vega(chart): vega_spec = chart.to_dict(format="vega") assert_is_vega_spec(vega_spec) @skip_requires_vl_convert def test_to_json_with_format_vega(chart): json_spec = chart.to_json(format="vega") assert isinstance(json_spec, str) spec = json.loads(json_spec) assert_is_vega_spec(spec) ================================================ FILE: tests/utils/test_core.py ================================================ from __future__ import annotations import types from importlib.metadata import version as importlib_version from typing import Any import numpy as np import pandas as pd import pytest from packaging.version import Version from pandas.api.types import infer_dtype import altair as alt from altair.utils import core from altair.utils.core import infer_encoding_types, parse_shorthand, update_nested from tests import skip_requires_pyarrow json_schema_specification = alt.load_schema()["$schema"] json_schema_dict_str = f'{{"$schema": "{json_schema_specification}"}}' PANDAS_VERSION = Version(importlib_version("pandas")) FAKE_CHANNELS_MODULE = f''' """Fake channels module for utility tests.""" from altair.utils import schemapi class FieldChannel: def __init__(self, shorthand, **kwargs): kwargs['shorthand'] = shorthand return super(FieldChannel, self).__init__(**kwargs) class ValueChannel: def __init__(self, value, **kwargs): kwargs['value'] = value return super(ValueChannel, self).__init__(**kwargs) class X(FieldChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "x" class XValue(ValueChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "x" class Y(FieldChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "y" class YValue(ValueChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "y" class StrokeWidth(FieldChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "strokeWidth" class StrokeWidthValue(ValueChannel, schemapi.SchemaBase): _schema = {json_schema_dict_str} _encoding_name = "strokeWidth" ''' @pytest.fixture(params=[False, True]) def pd_data(request) -> pd.DataFrame: data = pd.DataFrame( { "x": [1, 2, 3, 4, 5], "y": ["A", "B", "C", "D", "E"], "z": pd.date_range("2018-01-01", periods=5, freq="D"), "t": pd.date_range("2018-01-01", periods=5, freq="D").tz_localize("UTC"), } ) object_dtype = request.param if object_dtype: data = data.astype("object") return data @pytest.mark.parametrize( ("value", "expected_type"), [ ([1, 2, 3], "integer"), ([1.0, 2.0, 3.0], "floating"), ([1, 2.0, 3], "mixed-integer-float"), (["a", "b", "c"], "string"), (["a", "b", np.nan], "mixed"), ], ) def test_infer_dtype(value, expected_type): assert infer_dtype(value, skipna=False) == expected_type # ruff: noqa: C408 @pytest.mark.parametrize( ("shorthand", "expected"), [ ("", {}), # Fields alone ("foobar", dict(field="foobar")), (r"blah\:(fd ", dict(field=r"blah\:(fd ")), # Fields with type ("foobar:quantitative", dict(type="quantitative", field="foobar")), ("foobar:nominal", dict(type="nominal", field="foobar")), ("foobar:ordinal", dict(type="ordinal", field="foobar")), ("foobar:temporal", dict(type="temporal", field="foobar")), ("foobar:geojson", dict(type="geojson", field="foobar")), ("foobar:Q", dict(type="quantitative", field="foobar")), ("foobar:N", dict(type="nominal", field="foobar")), ("foobar:O", dict(type="ordinal", field="foobar")), ("foobar:T", dict(type="temporal", field="foobar")), ("foobar:G", dict(type="geojson", field="foobar")), # Fields with aggregate and/or type ("average(foobar)", dict(field="foobar", aggregate="average")), ( "min(foobar):temporal", dict(type="temporal", field="foobar", aggregate="min"), ), ("sum(foobar):Q", dict(type="quantitative", field="foobar", aggregate="sum")), # check that invalid arguments are not split-out ("invalid(blah)", dict(field="invalid(blah)")), (r"blah\:invalid", dict(field=r"blah\:invalid")), (r"invalid(blah)\:invalid", dict(field=r"invalid(blah)\:invalid")), # check parsing in presence of strange characters ( r"average(a b\:(c\nd):Q", dict(aggregate="average", field=r"a b\:(c\nd", type="quantitative"), ), # special case: count doesn't need an argument ("count()", dict(aggregate="count", type="quantitative")), ("count():O", dict(aggregate="count", type="ordinal")), # time units: ("month(x)", dict(field="x", timeUnit="month", type="temporal")), ("year(foo):O", dict(field="foo", timeUnit="year", type="ordinal")), ( "date(date):quantitative", dict(field="date", timeUnit="date", type="quantitative"), ), ( "yearmonthdate(field)", dict(field="field", timeUnit="yearmonthdate", type="temporal"), ), ], ) def test_parse_shorthand(shorthand: str, expected: dict[str, Any]) -> None: assert parse_shorthand(shorthand) == expected @pytest.mark.parametrize( ("shorthand", "expected"), [ ("x", dict(field="x", type="quantitative")), ("y", dict(field="y", type="nominal")), ("z", dict(field="z", type="temporal")), ("t", dict(field="t", type="temporal")), ("count(x)", dict(field="x", aggregate="count", type="quantitative")), ("count()", dict(aggregate="count", type="quantitative")), ("month(z)", dict(timeUnit="month", field="z", type="temporal")), ("month(t)", dict(timeUnit="month", field="t", type="temporal")), ], ) def test_parse_shorthand_with_data( pd_data, shorthand: str, expected: dict[str, Any] ) -> None: assert parse_shorthand(shorthand, pd_data) == expected @pytest.mark.skipif(Version("1.0.0") > PANDAS_VERSION, reason="dtype unavailable") def test_parse_shorthand_with_data_pandas_v1(pd_data) -> None: pd_data["b"] = pd.Series([True, False, True, False, None], dtype="boolean") shorthand = "b" expected = dict(field="b", type="nominal") assert parse_shorthand(shorthand, pd_data) == expected @skip_requires_pyarrow def test_parse_shorthand_for_arrow_timestamp(): import pyarrow as pa data = pd.DataFrame( { "z": pd.date_range("2018-01-01", periods=5, freq="D"), "t": pd.date_range("2018-01-01", periods=5, freq="D").tz_localize("UTC"), } ) # Convert to arrow-packed dtypes data = pa.Table.from_pandas(data).to_pandas(types_mapper=pd.ArrowDtype) assert parse_shorthand("z", data) == {"field": "z", "type": "temporal"} assert parse_shorthand("z", data) == {"field": "z", "type": "temporal"} def test_parse_shorthand_all_aggregates(): aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"] for aggregate in aggregates: shorthand = f"{aggregate}(field):Q" assert parse_shorthand(shorthand) == { "aggregate": aggregate, "field": "field", "type": "quantitative", } def test_parse_shorthand_all_timeunits(): timeUnits = [] for loc in ["Local", "Utc"]: for typ in ["Single", "Multi"]: defn = loc + typ + "TimeUnit" timeUnits.extend(alt.Root._schema["definitions"][defn]["enum"]) for timeUnit in timeUnits: shorthand = f"{timeUnit}(field):Q" assert parse_shorthand(shorthand) == { "timeUnit": timeUnit, "field": "field", "type": "quantitative", } def test_parse_shorthand_window_count(): shorthand = "count()" dct = parse_shorthand( shorthand, parse_aggregates=False, parse_window_ops=True, parse_timeunits=False, parse_types=False, ) assert dct == {"op": "count"} def test_parse_shorthand_all_window_ops(): window_ops = alt.Root._schema["definitions"]["WindowOnlyOp"]["enum"] aggregates = alt.Root._schema["definitions"]["AggregateOp"]["enum"] for op in window_ops + aggregates: shorthand = f"{op}(field)" dct = parse_shorthand( shorthand, parse_aggregates=False, parse_window_ops=True, parse_timeunits=False, parse_types=False, ) assert dct == {"field": "field", "op": op} def test_update_nested(): original = {"x": {"b": {"foo": 2}, "c": 4}} update = {"x": {"b": {"foo": 5}, "d": 6}, "y": 40} output = update_nested(original, update, copy=True) assert output is not original assert output == {"x": {"b": {"foo": 5}, "c": 4, "d": 6}, "y": 40} output2 = update_nested(original, update) assert output2 is original assert output == output2 @pytest.fixture def channels() -> types.ModuleType: channels = types.ModuleType("channels") exec(FAKE_CHANNELS_MODULE, channels.__dict__) return channels @pytest.fixture def channels_cached(channels) -> core._ChannelCache: """Previously ``_ChannelCache.from_channels``.""" cached = core._ChannelCache.__new__(core._ChannelCache) cached.channel_to_name = { c: c._encoding_name # pyright: ignore[reportAttributeAccessIssue] for c in channels.__dict__.values() if isinstance(c, type) and issubclass(c, alt.SchemaBase) and hasattr(c, "_encoding_name") } cached.name_to_channel = core._invert_group_channels(cached.channel_to_name) return cached def _getargs(*args, **kwargs): return args, kwargs def test_infer_encoding_types( monkeypatch: pytest.MonkeyPatch, channels, channels_cached ): # Indirectly initialize `_CHANNEL_CACHE` infer_encoding_types((), {}) # Replace with contents of `FAKE_CHANNELS_MODULE` # Scoped to only this test monkeypatch.setattr(core, "_CHANNEL_CACHE", channels_cached) expected = { "x": channels.X("xval"), "y": channels.YValue("yval"), "strokeWidth": channels.StrokeWidthValue(value=4), } # All positional args args, kwds = _getargs( channels.X("xval"), channels.YValue("yval"), channels.StrokeWidthValue(4) ) assert infer_encoding_types(args, kwds) == expected # All keyword args args, kwds = _getargs(x="xval", y=alt.value("yval"), strokeWidth=alt.value(4)) assert infer_encoding_types(args, kwds) == expected # Mixed positional & keyword args, kwds = _getargs( channels.X("xval"), channels.YValue("yval"), strokeWidth=alt.value(4) ) assert infer_encoding_types(args, kwds) == expected def test_infer_encoding_types_with_condition(): args, kwds = _getargs( size=alt.condition("pred1", alt.value(1), alt.value(2)), color=alt.condition("pred2", alt.value("red"), "cfield:N"), opacity=alt.condition("pred3", "ofield:N", alt.value(0.2)), ) expected = { "size": alt.SizeValue( 2, condition=alt.ConditionalPredicateValueDefnumberExprRef( value=1, test=alt.Predicate("pred1") ), ), "color": alt.Color( field=alt.FieldName("cfield"), type=alt.StandardType("nominal"), condition=alt.ConditionalPredicateValueDefGradientstringnullExprRef( value="red", test=alt.Predicate("pred2"), ), ), "opacity": alt.OpacityValue( 0.2, condition=alt.ConditionalPredicateMarkPropFieldOrDatumDef( field=alt.FieldName("ofield"), test=alt.Predicate("pred3"), type=alt.StandardType("nominal"), ), ), } assert infer_encoding_types(args, kwds) == expected def test_invalid_data_type(): with pytest.raises( ValueError, match=r'"\(fd " is not one of the valid encoding data types' ): parse_shorthand(r"blah:(fd ") ================================================ FILE: tests/utils/test_data.py ================================================ from __future__ import annotations from pathlib import Path from typing import TYPE_CHECKING, Any, SupportsIndex, TypeVar import narwhals.stable.v1 as nw import pandas as pd import polars as pl import pytest from altair.utils.data import ( MaxRowsError, limit_rows, sample, to_csv, to_json, to_values, ) if TYPE_CHECKING: from collections.abc import Callable T = TypeVar("T") def _pipe(data: Any, *funcs: Callable[..., Any]) -> Any: # Redefined to maintain existing tests # Originally part of `toolz` dependency for func in funcs: data = func(data) return data def _create_dataframe( n: SupportsIndex, /, tp: Callable[..., T] | type[Any] = pd.DataFrame ) -> T | Any: data = tp({"x": range(n), "y": range(n)}) return data def _create_data_with_values(n: SupportsIndex, /) -> dict[str, Any]: data = {"values": [{"x": i, "y": i + 1} for i in range(n)]} return data def test_limit_rows(): """Test the limit_rows data transformer.""" data = nw.from_native(_create_dataframe(10), eager_only=True) result = limit_rows(data, max_rows=20) assert data is result with pytest.raises(MaxRowsError): _pipe(data, limit_rows(max_rows=5)) data = _create_data_with_values(10) result = _pipe(data, limit_rows(max_rows=20)) assert data is result with pytest.raises(MaxRowsError): limit_rows(data, max_rows=5) def test_sample(): """Test the sample data transformer.""" data = _create_dataframe(20) result = _pipe(data, sample(n=10)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) result = sample(data, n=10) assert isinstance(result, dict) assert "values" in result assert len(result["values"]) == 10 data = _create_dataframe(20) result = _pipe(data, sample(frac=0.5)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) result = sample(data, frac=0.5) assert isinstance(result, dict) assert "values" in result assert len(result["values"]) == 10 result = sample(pl.DataFrame(data), n=10) assert isinstance(result, pl.DataFrame) assert len(result) == 10 def test_to_values(): """Test the to_values data transformer.""" data = _create_dataframe(10) result = _pipe(data, to_values) assert result == {"values": data.to_dict(orient="records")} def test_type_error(): """Ensure that TypeError is raised for types other than dict/DataFrame.""" for f in (sample, limit_rows, to_values): with pytest.raises(TypeError): _pipe(0, f) def test_dataframe_to_json(): """ Test to_json. - make certain the filename is deterministic - make certain the file contents match the data. """ filename = "" data = _create_dataframe(10) try: result1 = _pipe(data, to_json) result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename) finally: if filename: Path(filename).unlink() assert result1 == result2 assert output.equals(data) def test_dict_to_json(): """ Test to_json. - make certain the filename is deterministic - make certain the file contents match the data. """ filename = "" data = _create_data_with_values(10) try: result1 = _pipe(data, to_json) result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename).to_dict(orient="records") finally: if filename: Path(filename).unlink() assert result1 == result2 assert data == {"values": output} @pytest.mark.parametrize("tp", [pd.DataFrame, pl.DataFrame], ids=["pandas", "polars"]) def test_dataframe_to_csv(tp: type[Any]) -> None: """ Test to_csv with dataframe input. - make certain the filename is deterministic - make certain the file contents match the data. """ filename: str = "" data = _create_dataframe(10, tp=tp) try: result1 = _pipe(data, to_csv) result2 = _pipe(data, to_csv) filename = result1["url"] output = tp(pd.read_csv(filename)) finally: if filename: Path(filename).unlink() assert result1 == result2 assert output.equals(data) def test_dict_to_csv(): """ Test to_csv with dict input. - make certain the filename is deterministic - make certain the file contents match the data. """ filename = "" data = _create_data_with_values(10) try: result1 = _pipe(data, to_csv) result2 = _pipe(data, to_csv) filename = result1["url"] output = pd.read_csv(filename).to_dict(orient="records") finally: if filename: Path(filename).unlink() assert result1 == result2 assert data == {"values": output} ================================================ FILE: tests/utils/test_deprecation.py ================================================ # ruff: noqa: B018 import re import pytest from altair.utils.deprecation import ( AltairDeprecationWarning, _warnings_monitor, deprecated, deprecated_warn, ) def test_deprecated_class(): class Dummy: def __init__(self, *args) -> None: self.args = args OldChart = deprecated(version="2.0.0", alternative="LayerChart")(Dummy) with pytest.warns(AltairDeprecationWarning, match=r"altair=2\.0\.0.+LayerChart"): OldChart() def test_deprecation_decorator(): @deprecated(version="999", alternative="func_12345") def func(x): return x + 1 with pytest.warns( AltairDeprecationWarning, match=r"altair=999.+func_12345 instead" ): y = func(1) assert y == 2 def test_deprecation_warn(): with pytest.warns( AltairDeprecationWarning, match=re.compile(r"altair=3321.+this code path is a noop", flags=re.DOTALL), ): deprecated_warn("this code path is a noop", version="3321", stacklevel=1) def test_deprecated_import(): import altair as alt pattern = re.compile( r"altair=5\.5\.0.+\.theme instead.+user.guide", flags=re.DOTALL | re.IGNORECASE, ) with pytest.warns(AltairDeprecationWarning, match=pattern): alt.themes # NOTE: Tests that second access does not trigger a warning assert alt.themes # Then reset cache _warnings_monitor.clear() with pytest.warns(AltairDeprecationWarning, match=pattern): from altair import themes # noqa: F401 assert alt.themes == alt.theme._themes _warnings_monitor.clear() ================================================ FILE: tests/utils/test_execeval.py ================================================ from altair.utils.execeval import eval_block HAS_RETURN = """ x = 4 y = 2 * x 3 * y """ NO_RETURN = """ x = 4 y = 2 * x z = 3 * y """ def test_eval_block_with_return(): _globals = {} result = eval_block(HAS_RETURN, _globals) assert result == 24 assert _globals["x"] == 4 assert _globals["y"] == 8 def test_eval_block_without_return(): _globals = {} result = eval_block(NO_RETURN, _globals) assert result is None assert _globals["x"] == 4 assert _globals["y"] == 8 assert _globals["z"] == 24 ================================================ FILE: tests/utils/test_html.py ================================================ import pytest from altair.utils.html import spec_to_html @pytest.fixture def spec(): return { "data": {"url": "data.json"}, "mark": {"type": "point"}, "encoding": { "x": {"field": "x", "type": "quantitative"}, "y": {"field": "y", "type": "quantitative"}, }, } @pytest.mark.parametrize("requirejs", [True, False]) @pytest.mark.parametrize("fullhtml", [True, False]) def test_spec_to_html(requirejs, fullhtml, spec): # We can't test that the html actually renders, but we'll test aspects of # it to make certain that the keywords are respected. vegaembed_version = "3.12" vegalite_version = "3.0" vega_version = "4.0" html = spec_to_html( spec, mode="vega-lite", requirejs=requirejs, fullhtml=fullhtml, vegalite_version=vegalite_version, vegaembed_version=vegaembed_version, vega_version=vega_version, ) html = html.strip() if fullhtml: assert html.startswith("") assert html.endswith("") else: assert html.startswith("