[
  {
    "path": ".dvc/.gitignore",
    "content": "/config.local\n/tmp\n/cache\n"
  },
  {
    "path": ".dvc/config",
    "content": ""
  },
  {
    "path": ".dvcignore",
    "content": "/tests\n/dvc\n/.github\n"
  },
  {
    "path": ".git-blame-ignore-revs",
    "content": "# The following commits will be ignored by the GitHub blame view.\n# You can configure `git blame` to use this file as the default ignore file:\n#     git config blame.ignoreRevsFile .git-blame-ignore-revs\n# or, you can also manually ignore commits from this file by doing:\n#     git blame --ignore-revs-file .git-blame-ignore-revs <filepath>\n# see the `blame.markIgnoredLines` and `blame.markUnblamableLines` options as well.\n# Refer to: https://git-scm.com/docs/git-blame#Documentation/git-blame.txt---ignore-revs-fileltfilegt\n\n# update black to 2023 stable style and change line-length to 88\na86470ccbc693d6f0a4f8066cbd1357e5191c4ff\n"
  },
  {
    "path": ".git_archival.txt",
    "content": "node: $Format:%H$\nnode-date: $Format:%cI$\ndescribe-name: $Format:%(describe:tags=true)$\nref-names: $Format:%D$\n"
  },
  {
    "path": ".gitattributes",
    "content": ".git_archival.txt  export-subst\n"
  },
  {
    "path": ".github/.test_durations",
    "content": "{\r\n    \"tests/func/api/test_artifacts.py::test_artifacts_show[]\": 0.6690752999999994,\r\n    \"tests/func/api/test_artifacts.py::test_artifacts_show[sub]\": 1.3406024999999993,\r\n    \"tests/func/api/test_artifacts.py::test_artifacts_show_subrepo[]\": 0.7644723000000031,\r\n    \"tests/func/api/test_artifacts.py::test_artifacts_show_subrepo[sub]\": 0.8369741000000008,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[current-cache]\": 0.7119634999999978,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[current-no_cache]\": 0.7142162999999968,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[git-cache]\": 0.8079615999999987,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[git-no_cache]\": 1.2323302999999974,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[git_rev-cache]\": 1.436392699999999,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[git_rev-no_cache]\": 1.2994606999999974,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[local-cache]\": 0.7356022999999965,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[local-no_cache]\": 1.1919572000000045,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[local_rev-cache]\": 0.8559640000000037,\r\n    \"tests/func/api/test_data.py::TestAPI::test_filesystem[local_rev-no_cache]\": 0.713584400000002,\r\n    \"tests/func/api/test_data.py::TestAPI::test_get_url\": 0.5835055000000011,\r\n    \"tests/func/api/test_data.py::TestAPI::test_open\": 0.6204048000000011,\r\n    \"tests/func/api/test_data.py::test_api_missing_local_cache_exists_on_remote[file-False]\": 0.6519310000000011,\r\n    \"tests/func/api/test_data.py::test_api_missing_local_cache_exists_on_remote[file-True]\": 0.7536261999999994,\r\n    \"tests/func/api/test_data.py::test_api_missing_local_cache_exists_on_remote[inside-dir-False]\": 0.7582351000000003,\r\n    \"tests/func/api/test_data.py::test_api_missing_local_cache_exists_on_remote[inside-dir-True]\": 0.8035712000000004,\r\n    \"tests/func/api/test_data.py::test_get_url_external\": 1.0128078999999985,\r\n    \"tests/func/api/test_data.py::test_get_url_from_remote\": 0.8396529000000044,\r\n    \"tests/func/api/test_data.py::test_get_url_granular\": 0.6913349999999951,\r\n    \"tests/func/api/test_data.py::test_get_url_requires_dvc\": 0.34330409999999745,\r\n    \"tests/func/api/test_data.py::test_get_url_subrepos\": 1.2583151999999984,\r\n    \"tests/func/api/test_data.py::test_missing\": 0.4301550999999968,\r\n    \"tests/func/api/test_data.py::test_open_external\": 1.0483930000000008,\r\n    \"tests/func/api/test_data.py::test_open_from_remote\": 1.031453599999999,\r\n    \"tests/func/api/test_data.py::test_open_granular\": 0.47627430000000004,\r\n    \"tests/func/api/test_data.py::test_open_not_cached\": 0.4482658999999991,\r\n    \"tests/func/api/test_data.py::test_open_rev\": 0.2935205000000032,\r\n    \"tests/func/api/test_data.py::test_open_scm_controlled\": 0.2779954000000018,\r\n    \"tests/func/api/test_data.py::test_read_from_remote\": 1.1489083000000022,\r\n    \"tests/func/api/test_data.py::test_read_with_subrepos[False]\": 1.9086240000000103,\r\n    \"tests/func/api/test_data.py::test_read_with_subrepos[True]\": 1.2316525000000027,\r\n    \"tests/func/api/test_experiments.py::test_exp_save\": 1.126176700000002,\r\n    \"tests/func/api/test_experiments.py::test_exp_show\": 0.7599229999999935,\r\n    \"tests/func/api/test_scm.py::test_all_branches\": 0.36340509999998716,\r\n    \"tests/func/api/test_scm.py::test_all_commits\": 0.9080075000000036,\r\n    \"tests/func/api/test_scm.py::test_all_tags\": 0.32061440000000374,\r\n    \"tests/func/api/test_show.py::test_metrics_show_dirty_working_dir\": 1.962079800000005,\r\n    \"tests/func/api/test_show.py::test_metrics_show_no_args\": 1.889685600000007,\r\n    \"tests/func/api/test_show.py::test_metrics_show_no_metrics_found\": 0.28593630000000303,\r\n    \"tests/func/api/test_show.py::test_metrics_show_rev_with_metrics\": 2.2642164000000022,\r\n    \"tests/func/api/test_show.py::test_metrics_show_rev_without_metrics\": 1.8105644999999981,\r\n    \"tests/func/api/test_show.py::test_metrics_show_targets\": 2.152608200000003,\r\n    \"tests/func/api/test_show.py::test_params_show_deps\": 0.7593096000000017,\r\n    \"tests/func/api/test_show.py::test_params_show_no_args\": 0.8288500999999968,\r\n    \"tests/func/api/test_show.py::test_params_show_no_params_found\": 0.42213559999999717,\r\n    \"tests/func/api/test_show.py::test_params_show_repo\": 0.5701109999999971,\r\n    \"tests/func/api/test_show.py::test_params_show_revs\": 0.7689407999999958,\r\n    \"tests/func/api/test_show.py::test_params_show_stage_addressing\": 0.5756270999999984,\r\n    \"tests/func/api/test_show.py::test_params_show_stage_without_params\": 0.32598780000000005,\r\n    \"tests/func/api/test_show.py::test_params_show_stages\": 1.3495361999999957,\r\n    \"tests/func/api/test_show.py::test_params_show_targets\": 1.131466500000002,\r\n    \"tests/func/api/test_show.py::test_params_show_untracked_target\": 0.874395799999995,\r\n    \"tests/func/api/test_show.py::test_params_show_while_running_stage\": 0.9435678999999979,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_add_abspath\": 0.1416759999999968,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_add_fails_on_dvc_subrepo\": 0.1239182999999926,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_add_subdir\": 0.1774754999999999,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_download[]\": 0.9987292999999937,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_download[sub]\": 1.0444975999999997,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_download_studio\": 0.13496169999999097,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_download_subrepo[]\": 1.1816100999999861,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_download_subrepo[sub]\": 1.5492141000000004,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_read_bad_name\": 0.13257700000000483,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_read_fails_on_id_duplication\": 0.12151710000000548,\r\n    \"tests/func/artifacts/test_artifacts.py::test_artifacts_read_subdir\": 0.14055419999999685,\r\n    \"tests/func/artifacts/test_artifacts.py::test_broken_dvcyaml_extra_field[bad_dvcyaml0]\": 0.14516059999999698,\r\n    \"tests/func/artifacts/test_artifacts.py::test_broken_dvcyaml_extra_field[bad_dvcyaml1]\": 0.12352989999997988,\r\n    \"tests/func/artifacts/test_artifacts.py::test_get_path\": 0.19403669999999806,\r\n    \"tests/func/artifacts/test_artifacts.py::test_get_path_subrepo\": 0.603105400000004,\r\n    \"tests/func/artifacts/test_artifacts.py::test_get_rev\": 0.22633340000000146,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[1]\": 0.0032751000000104113,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[1nn]\": 0.0029597000000052276,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[m1]\": 0.0030679999999847496,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[m]\": 0.0034961999999865156,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[model-prod-v1]\": 0.0029481000000117774,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[model-prod]\": 0.003120100000003845,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible[nn]\": 0.003020700000007537,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[###]\": 0.0029940000000010514,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[-model]\": 0.0030767999999881113,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[/m]\": 0.003051200000001586,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[@@@]\": 0.0030206000000134736,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[@namespace/model]\": 0.0036754999999999427,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[]\": 0.003089799999997922,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[a model]\": 0.003036399999984951,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[m/]\": 0.003116800000000808,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[model#1]\": 0.0030410000000102855,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[model-]\": 0.0030069999999966512,\r\n    \"tests/func/artifacts/test_artifacts.py::test_name_is_compatible_fails[model@1]\": 0.003050500000000511,\r\n    \"tests/func/artifacts/test_artifacts.py::test_parametrized\": 0.29747580000001506,\r\n    \"tests/func/data/db/test_index.py::test_clear_on_download_err\": 0.4539134000000189,\r\n    \"tests/func/data/db/test_index.py::test_clear_on_gc\": 0.43299960000000226,\r\n    \"tests/func/data/db/test_index.py::test_indexed_dir_missing\": 0.33903929999999605,\r\n    \"tests/func/data/db/test_index.py::test_indexed_on_push\": 0.45842240000001766,\r\n    \"tests/func/data/db/test_index.py::test_indexed_on_status\": 0.47928760000000636,\r\n    \"tests/func/data/db/test_index.py::test_partial_upload\": 0.5441733999999911,\r\n    \"tests/func/experiments/test_apply.py::test_apply\": 3.0489364999999964,\r\n    \"tests/func/experiments/test_apply.py::test_apply_failed\": 0.7756726000000072,\r\n    \"tests/func/experiments/test_apply.py::test_apply_queued\": 1.3775269999999864,\r\n    \"tests/func/experiments/test_apply.py::test_apply_unchanged_head\": 2.3483619999999803,\r\n    \"tests/func/experiments/test_apply.py::test_apply_untracked\": 1.7779260000000079,\r\n    \"tests/func/experiments/test_diff.py::test_diff_empty\": 0.6685833000000088,\r\n    \"tests/func/experiments/test_diff.py::test_diff_exp\": 2.169962599999991,\r\n    \"tests/func/experiments/test_diff.py::test_diff_head\": 1.3638808000000182,\r\n    \"tests/func/experiments/test_experiments.py::test_branch\": 2.1533368000000053,\r\n    \"tests/func/experiments/test_experiments.py::test_checkout_targets_deps\": 1.6357962000000157,\r\n    \"tests/func/experiments/test_experiments.py::test_clean\": 0.1494907000000012,\r\n    \"tests/func/experiments/test_experiments.py::test_copy_paths[False]\": 0.8107860000000358,\r\n    \"tests/func/experiments/test_experiments.py::test_copy_paths[True]\": 1.135548200000045,\r\n    \"tests/func/experiments/test_experiments.py::test_copy_paths_errors\": 0.9207782000000009,\r\n    \"tests/func/experiments/test_experiments.py::test_custom_commit_message[False]\": 0.9768105000000276,\r\n    \"tests/func/experiments/test_experiments.py::test_custom_commit_message[True]\": 1.2530415000000232,\r\n    \"tests/func/experiments/test_experiments.py::test_detached_parent\": 1.9396536999999938,\r\n    \"tests/func/experiments/test_experiments.py::test_exp_run_recursive\": 1.1496640000000298,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_exists[False]\": 2.698194200000003,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_exists[True]\": 2.0226000000000255,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_name_invalid\": 0.43865630000001943,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_no_commit\": 0.36042209999999386,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_run_dry\": 1.1187728000000448,\r\n    \"tests/func/experiments/test_experiments.py::test_experiment_unchanged\": 1.5600346999999886,\r\n    \"tests/func/experiments/test_experiments.py::test_experiments_workspace_not_log_exception\": 0.4825525000000255,\r\n    \"tests/func/experiments/test_experiments.py::test_failed_exp_workspace\": 0.8329308999999938,\r\n    \"tests/func/experiments/test_experiments.py::test_file_permissions\": 0.0012947000000025355,\r\n    \"tests/func/experiments/test_experiments.py::test_fix_exp_head[]\": 0.025993000000028132,\r\n    \"tests/func/experiments/test_experiments.py::test_fix_exp_head[^]\": 0.030619200000018054,\r\n    \"tests/func/experiments/test_experiments.py::test_fix_exp_head[~1]\": 0.04737730000002216,\r\n    \"tests/func/experiments/test_experiments.py::test_get_baseline\": 2.717492400000012,\r\n    \"tests/func/experiments/test_experiments.py::test_list\": 3.138903099999993,\r\n    \"tests/func/experiments/test_experiments.py::test_local_config_is_propagated_to_tmp\": 0.9434146000000396,\r\n    \"tests/func/experiments/test_experiments.py::test_mixed_git_dvc_out\": 1.5312878000000012,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[False-foo: 1-False]\": 2.0527747999999804,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[False-foo: 1-True]\": 2.294647700000013,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[False-foo: 2-False]\": 2.230488899999955,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[False-foo: 2-True]\": 1.8093015999999693,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[True-foo: 1-False]\": 1.7912287000000333,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[True-foo: 1-True]\": 1.4503267999999991,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[True-foo: 2-False]\": 1.679704700000002,\r\n    \"tests/func/experiments/test_experiments.py::test_modified_data_dep[True-foo: 2-True]\": 1.8077306999999792,\r\n    \"tests/func/experiments/test_experiments.py::test_new_simple[False-None]\": 1.7895829999999933,\r\n    \"tests/func/experiments/test_experiments.py::test_new_simple[False-foo]\": 1.5720811999999995,\r\n    \"tests/func/experiments/test_experiments.py::test_new_simple[True-None]\": 3.114446799999996,\r\n    \"tests/func/experiments/test_experiments.py::test_new_simple[True-foo]\": 1.5187964999999934,\r\n    \"tests/func/experiments/test_experiments.py::test_no_scm\": 0.254213099999987,\r\n    \"tests/func/experiments/test_experiments.py::test_packed_args_exists\": 1.1008666000000034,\r\n    \"tests/func/experiments/test_experiments.py::test_run_celery\": 10.300323799999973,\r\n    \"tests/func/experiments/test_experiments.py::test_run_env\": 1.2357240000000047,\r\n    \"tests/func/experiments/test_experiments.py::test_subdir[False]\": 1.5404146000000196,\r\n    \"tests/func/experiments/test_experiments.py::test_subdir[True]\": 1.3069993000000153,\r\n    \"tests/func/experiments/test_experiments.py::test_subrepo[False]\": 2.0550482000000017,\r\n    \"tests/func/experiments/test_experiments.py::test_subrepo[True]\": 1.5268956000000031,\r\n    \"tests/func/experiments/test_experiments.py::test_untracked[False]\": 1.8188596999999902,\r\n    \"tests/func/experiments/test_experiments.py::test_untracked[True]\": 1.2779773999999975,\r\n    \"tests/func/experiments/test_experiments.py::test_untracked_top_level_files_are_included_in_exp[False]\": 0.7230959000000041,\r\n    \"tests/func/experiments/test_experiments.py::test_untracked_top_level_files_are_included_in_exp[True]\": 0.9551126000000068,\r\n    \"tests/func/experiments/test_experiments.py::test_update_py_params\": 3.051115799999991,\r\n    \"tests/func/experiments/test_queue.py::test_celery_logs[False]\": 6.378822700000001,\r\n    \"tests/func/experiments/test_queue.py::test_celery_logs[True]\": 4.388742000000036,\r\n    \"tests/func/experiments/test_queue.py::test_copy_paths_queue\": 5.5122883999999885,\r\n    \"tests/func/experiments/test_queue.py::test_custom_commit_message_queue\": 5.6898801000000105,\r\n    \"tests/func/experiments/test_queue.py::test_queue_doesnt_remove_untracked_params_file\": 0.5072050999999931,\r\n    \"tests/func/experiments/test_remote.py::test_auth_error_list\": 0.14862869999998907,\r\n    \"tests/func/experiments/test_remote.py::test_auth_error_pull\": 0.15549210000000357,\r\n    \"tests/func/experiments/test_remote.py::test_auth_error_push\": 1.2056490000000224,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_misconfigured\": 2.3435677000000226,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_on_run[False-success]\": 2.4090233999999953,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_on_run[True-up_to_date]\": 2.7894929000000275,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_on_save[False-success]\": 2.384592700000013,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_on_save[True-up_to_date]\": 2.4866651000000104,\r\n    \"tests/func/experiments/test_remote.py::test_auto_push_tmp_dir\": 2.711254000000025,\r\n    \"tests/func/experiments/test_remote.py::test_get[False]\": 1.6720234999999946,\r\n    \"tests/func/experiments/test_remote.py::test_get[True]\": 1.5247991999999613,\r\n    \"tests/func/experiments/test_remote.py::test_list_remote[False]\": 3.433762700000017,\r\n    \"tests/func/experiments/test_remote.py::test_list_remote[True]\": 3.2126294999999914,\r\n    \"tests/func/experiments/test_remote.py::test_pull[False]\": 3.0422385000000247,\r\n    \"tests/func/experiments/test_remote.py::test_pull[True]\": 3.176392099999987,\r\n    \"tests/func/experiments/test_remote.py::test_pull_ambiguous_name\": 2.3352716999999927,\r\n    \"tests/func/experiments/test_remote.py::test_pull_args[False-True-None]\": 2.8758509999999546,\r\n    \"tests/func/experiments/test_remote.py::test_pull_args[True-False-True]\": 2.8549404000000322,\r\n    \"tests/func/experiments/test_remote.py::test_pull_diverged\": 1.5577454999999816,\r\n    \"tests/func/experiments/test_remote.py::test_pull_multi_rev\": 2.8295723000000237,\r\n    \"tests/func/experiments/test_remote.py::test_push[False]\": 3.469447900000006,\r\n    \"tests/func/experiments/test_remote.py::test_push[True]\": 3.1035360999999853,\r\n    \"tests/func/experiments/test_remote.py::test_push_ambiguous_name\": 2.7821596,\r\n    \"tests/func/experiments/test_remote.py::test_push_args[False-True-None]\": 3.2440407999999934,\r\n    \"tests/func/experiments/test_remote.py::test_push_args[True-False-True]\": 3.3222015999999996,\r\n    \"tests/func/experiments/test_remote.py::test_push_diverged\": 1.8917133999999578,\r\n    \"tests/func/experiments/test_remote.py::test_push_multi_rev\": 3.2379070999999726,\r\n    \"tests/func/experiments/test_remote.py::test_push_pull_invalid_workspace\": 1.779622499999988,\r\n    \"tests/func/experiments/test_remove.py::test_remove_all\": 2.2928706999999804,\r\n    \"tests/func/experiments/test_remove.py::test_remove_all_queued_experiments\": 1.9678789999999822,\r\n    \"tests/func/experiments/test_remove.py::test_remove_experiments_by_ref\": 3.0080556000000342,\r\n    \"tests/func/experiments/test_remove.py::test_remove_experiments_by_rev\": 2.64246559999998,\r\n    \"tests/func/experiments/test_remove.py::test_remove_multi_rev\": 2.2034240999999497,\r\n    \"tests/func/experiments/test_remove.py::test_remove_remote[False]\": 3.372327699999971,\r\n    \"tests/func/experiments/test_remove.py::test_remove_remote[True]\": 3.4991858999999863,\r\n    \"tests/func/experiments/test_remove.py::test_remove_special_queued_experiments\": 2.8150486000000114,\r\n    \"tests/func/experiments/test_rename.py::test_existing_name\": 1.892558500000007,\r\n    \"tests/func/experiments/test_rename.py::test_invalid_name\": 1.0761679999999956,\r\n    \"tests/func/experiments/test_rename.py::test_rename_experiment_by_name\": 1.1512879999999655,\r\n    \"tests/func/experiments/test_rename.py::test_same_name\": 1.0524115000000052,\r\n    \"tests/func/experiments/test_save.py::test_exp_save[None]\": 0.756629299999986,\r\n    \"tests/func/experiments/test_save.py::test_exp_save[test]\": 0.9682786999999848,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_after_commit\": 1.3312111000000186,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_custom_message\": 1.0982238999999936,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_include_untracked\": 0.77491729999997,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_include_untracked_force\": 0.8746251999999686,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_include_untracked_warning\": 0.9865022000000181,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_invalid_name[invalid..name]\": 0.43050610000000233,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_invalid_name[invalid/name]\": 0.37493930000005093,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_invalid_name[invalid?name]\": 0.3782524000000649,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_invalid_name[invalidname.]\": 0.4567057000000432,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_invalid_name[invalid~name]\": 0.37448059999996985,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_overwrite_experiment\": 1.2816242999999758,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_target\": 0.9417426999999634,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_unchanged\": 0.8742640000000392,\r\n    \"tests/func/experiments/test_save.py::test_exp_save_with_staged_changes\": 0.9713927999999896,\r\n    \"tests/func/experiments/test_save.py::test_untracked_dvclock_is_included_in_exp\": 0.7397634000000153,\r\n    \"tests/func/experiments/test_save.py::test_untracked_top_level_files_are_included_in_exp\": 0.8345099999999661,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[None-None-False]\": 1.3278675000000248,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[None-None-True]\": 2.344593800000041,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[None-bar-False]\": 1.5483196000000703,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[None-bar-True]\": 2.2510721000000444,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[conf-bar-False]\": 1.3779260000000022,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_compose_and_dump[conf-bar-True]\": 2.2169706000000815,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_sweep[False-overrides1-expected1]\": 0.3220042000000376,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_sweep[False-overrides2-expected2]\": 0.372244099999989,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_sweep[True-overrides0-expected0]\": 0.329921899999988,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_sweep_prefix_name\": 0.7365541000000349,\r\n    \"tests/func/experiments/test_set_params.py::test_hydra_sweep_requires_queue\": 0.3379746999999611,\r\n    \"tests/func/experiments/test_set_params.py::test_modify_params[changes0-foo: baz\\\\ngoo:\\\\n  bag: 3.0\\\\nlorem: false]\": 0.8577662999999802,\r\n    \"tests/func/experiments/test_set_params.py::test_modify_params[changes1-foo: baz\\\\ngoo:\\\\n  bag: 3.0\\\\nlorem: false]\": 0.88462659999999,\r\n    \"tests/func/experiments/test_show.py::test_metrics_renaming\": 1.9396978000000331,\r\n    \"tests/func/experiments/test_show.py::test_show_baseline_error\": 1.3798479000000157,\r\n    \"tests/func/experiments/test_show.py::test_show_branch_and_tag_name\": 0.8210886000000528,\r\n    \"tests/func/experiments/test_show.py::test_show_completed_error\": 2.197091100000023,\r\n    \"tests/func/experiments/test_show.py::test_show_csv\": 3.597721500000034,\r\n    \"tests/func/experiments/test_show.py::test_show_experiment[False]\": 2.177178099999992,\r\n    \"tests/func/experiments/test_show.py::test_show_experiment[True]\": 1.7142262000000414,\r\n    \"tests/func/experiments/test_show.py::test_show_failed_experiment\": 6.400676799999985,\r\n    \"tests/func/experiments/test_show.py::test_show_filter\": 1.0972039999999765,\r\n    \"tests/func/experiments/test_show.py::test_show_multiple_commits\": 1.5359957999999665,\r\n    \"tests/func/experiments/test_show.py::test_show_only_changed\": 1.7540200000000823,\r\n    \"tests/func/experiments/test_show.py::test_show_outs\": 1.5832652999999368,\r\n    \"tests/func/experiments/test_show.py::test_show_queued\": 1.3007974999999874,\r\n    \"tests/func/experiments/test_show.py::test_show_queued_error\": 1.2482168999999885,\r\n    \"tests/func/experiments/test_show.py::test_show_running[TaskStatus.FAILED-False]\": 0.6858837999999423,\r\n    \"tests/func/experiments/test_show.py::test_show_running[TaskStatus.RUNNING-False]\": 0.6576839000000518,\r\n    \"tests/func/experiments/test_show.py::test_show_running[TaskStatus.RUNNING-True]\": 0.9630088000000114,\r\n    \"tests/func/experiments/test_show.py::test_show_simple\": 0.6424460000000067,\r\n    \"tests/func/experiments/test_show.py::test_show_sort\": 1.8322294999999826,\r\n    \"tests/func/experiments/test_show.py::test_show_sort_metric_sep\": 0.9055835999999999,\r\n    \"tests/func/experiments/test_show.py::test_show_sorted_deps\": 0.4166311000000178,\r\n    \"tests/func/experiments/test_show.py::test_show_with_broken_repo\": 1.5486077000000478,\r\n    \"tests/func/experiments/test_stash_exp.py::test_deleted[False-False]\": 0.7941452000000595,\r\n    \"tests/func/experiments/test_stash_exp.py::test_deleted[False-True]\": 1.0229252999999972,\r\n    \"tests/func/experiments/test_stash_exp.py::test_deleted[True-False]\": 0.8553899000000342,\r\n    \"tests/func/experiments/test_stash_exp.py::test_deleted[True-True]\": 0.9744108999999526,\r\n    \"tests/func/experiments/test_stash_exp.py::test_modified[False-False]\": 0.9669892000000004,\r\n    \"tests/func/experiments/test_stash_exp.py::test_modified[False-True]\": 1.1569672999999625,\r\n    \"tests/func/experiments/test_stash_exp.py::test_modified[True-False]\": 0.8601257999999916,\r\n    \"tests/func/experiments/test_stash_exp.py::test_modified[True-True]\": 1.2164980000000014,\r\n    \"tests/func/experiments/test_stash_exp.py::test_staged_new_file[False]\": 0.7563609999999699,\r\n    \"tests/func/experiments/test_stash_exp.py::test_staged_new_file[True]\": 1.1449452999999608,\r\n    \"tests/func/experiments/test_utils.py::test_generate_random_exp_name\": 2.6529195999999615,\r\n    \"tests/func/metrics/test_diff.py::test_diff_top_level_metrics[dir/dvc.yaml-..\\\\\\\\my_metrics.yaml]\": 0.3479751000000988,\r\n    \"tests/func/metrics/test_diff.py::test_diff_top_level_metrics[dir/dvc.yaml-my_metrics.yaml]\": 0.4379244000000426,\r\n    \"tests/func/metrics/test_diff.py::test_diff_top_level_metrics[dvc.yaml-my_metrics.yaml]\": 0.4825845999999956,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_active_branch_unchanged\": 1.0561294999999973,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_broken_json\": 0.7592303000000129,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_cli\": 1.1395653000000152,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_deleted_metric\": 0.5902245999999423,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_dirty\": 1.0837390000000937,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_json\": 1.1032326000000126,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_json_unchanged\": 1.0659378999999376,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_new_metric\": 0.5472320000000082,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_no_metrics\": 0.2914406999999528,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_non_metrics\": 0.5010075999999799,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_simple\": 1.1088795000000005,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_with_unchanged\": 0.6916975999999977,\r\n    \"tests/func/metrics/test_diff.py::test_metrics_diff_yaml\": 1.105636499999946,\r\n    \"tests/func/metrics/test_diff.py::test_no_commits\": 0.3156121999999755,\r\n    \"tests/func/metrics/test_show.py::test_cached_metrics\": 0.7686137999999687,\r\n    \"tests/func/metrics/test_show.py::test_log_errors[dvc.yaml-error_path0-YAMLSyntaxError]\": 0.5792332000000329,\r\n    \"tests/func/metrics/test_show.py::test_log_errors[metrics.yaml-error_path1-YAMLFileCorruptedError]\": 0.6297549999999887,\r\n    \"tests/func/metrics/test_show.py::test_metric_in_a_tracked_directory_with_missing_dir_file\": 0.3644590999999764,\r\n    \"tests/func/metrics/test_show.py::test_metrics_show_no_target\": 0.1800465999999119,\r\n    \"tests/func/metrics/test_show.py::test_metrics_show_overlap[False]\": 0.001055300000018633,\r\n    \"tests/func/metrics/test_show.py::test_metrics_show_overlap[True]\": 0.001104099999963637,\r\n    \"tests/func/metrics/test_show.py::test_missing_cache\": 0.6544150999999374,\r\n    \"tests/func/metrics/test_show.py::test_non_metric_and_dir_show\": 0.44453069999997297,\r\n    \"tests/func/metrics/test_show.py::test_show\": 0.3920631999999955,\r\n    \"tests/func/metrics/test_show.py::test_show_branch\": 0.6811390999999958,\r\n    \"tests/func/metrics/test_show.py::test_show_falsey\": 0.20822529999998096,\r\n    \"tests/func/metrics/test_show.py::test_show_malformed_metric\": 0.2597896999999989,\r\n    \"tests/func/metrics/test_show.py::test_show_multiple\": 0.5938720999999987,\r\n    \"tests/func/metrics/test_show.py::test_show_no_metrics_files\": 0.1196421999999302,\r\n    \"tests/func/metrics/test_show.py::test_show_no_repo\": 0.0665103999999701,\r\n    \"tests/func/metrics/test_show.py::test_show_non_metric[False]\": 0.10242660000000114,\r\n    \"tests/func/metrics/test_show.py::test_show_non_metric[True]\": 0.37919599999992215,\r\n    \"tests/func/metrics/test_show.py::test_show_non_metric_branch[False]\": 0.24209780000001047,\r\n    \"tests/func/metrics/test_show.py::test_show_non_metric_branch[True]\": 0.5168121000000383,\r\n    \"tests/func/metrics/test_show.py::test_show_simple\": 0.3916608999999198,\r\n    \"tests/func/metrics/test_show.py::test_show_simple_from_subdir\": 0.4041560000000004,\r\n    \"tests/func/metrics/test_show.py::test_show_subrepo_with_preexisting_tags\": 0.8456790999999839,\r\n    \"tests/func/metrics/test_show.py::test_show_targets\": 0.5895505000000298,\r\n    \"tests/func/metrics/test_show.py::test_show_toml\": 0.40270450000008395,\r\n    \"tests/func/metrics/test_show.py::test_top_level_parametrized\": 0.36376849999999195,\r\n    \"tests/func/params/test_diff.py::test_diff\": 0.49012349999992466,\r\n    \"tests/func/params/test_diff.py::test_diff_active_branch_no_changes\": 0.373032999999964,\r\n    \"tests/func/params/test_diff.py::test_diff_deleted\": 0.38685540000000174,\r\n    \"tests/func/params/test_diff.py::test_diff_dict\": 0.38326520000009623,\r\n    \"tests/func/params/test_diff.py::test_diff_dirty\": 0.4405797000000007,\r\n    \"tests/func/params/test_diff.py::test_diff_list\": 0.413541500000008,\r\n    \"tests/func/params/test_diff.py::test_diff_new\": 0.31764760000004344,\r\n    \"tests/func/params/test_diff.py::test_diff_no_changes\": 0.3898240999999416,\r\n    \"tests/func/params/test_diff.py::test_diff_no_params\": 0.33827360000003637,\r\n    \"tests/func/params/test_diff.py::test_diff_targeted\": 0.9226150999999732,\r\n    \"tests/func/params/test_diff.py::test_diff_top_level_params[dir/dvc.yaml-..\\\\\\\\my_params.yaml]\": 0.39441390000001775,\r\n    \"tests/func/params/test_diff.py::test_diff_top_level_params[dir/dvc.yaml-my_params.yaml]\": 0.35384809999999334,\r\n    \"tests/func/params/test_diff.py::test_diff_top_level_params[dvc.yaml-my_params.yaml]\": 0.35703069999993886,\r\n    \"tests/func/params/test_diff.py::test_diff_with_unchanged\": 0.47371570000001384,\r\n    \"tests/func/params/test_diff.py::test_diff_without_targets_specified[other_params.yaml]\": 0.3682619999999588,\r\n    \"tests/func/params/test_diff.py::test_diff_without_targets_specified[params.yaml]\": 0.3944649000000595,\r\n    \"tests/func/params/test_diff.py::test_no_commits\": 0.3962024000000497,\r\n    \"tests/func/params/test_diff.py::test_pipeline_tracked_params\": 0.791154199999994,\r\n    \"tests/func/params/test_diff.py::test_vars_shows_on_params_diff\": 0.5142563999999652,\r\n    \"tests/func/params/test_show.py::test_cached_params\": 0.670684299999948,\r\n    \"tests/func/params/test_show.py::test_deps_multi_stage\": 0.8328005999999846,\r\n    \"tests/func/params/test_show.py::test_deps_with_targets\": 0.7908949000000121,\r\n    \"tests/func/params/test_show.py::test_param_in_a_tracked_directory_with_missing_dir_file\": 0.286612899999966,\r\n    \"tests/func/params/test_show.py::test_pipeline_params\": 0.7957324000000199,\r\n    \"tests/func/params/test_show.py::test_show\": 0.24756000000002132,\r\n    \"tests/func/params/test_show.py::test_show_branch\": 0.7055480000000216,\r\n    \"tests/func/params/test_show.py::test_show_empty\": 0.18992719999999963,\r\n    \"tests/func/params/test_show.py::test_show_list\": 0.2911527999999066,\r\n    \"tests/func/params/test_show.py::test_show_multiple\": 0.3037872000000448,\r\n    \"tests/func/params/test_show.py::test_show_no_repo\": 0.06914749999998548,\r\n    \"tests/func/params/test_show.py::test_show_py\": 0.22990459999999757,\r\n    \"tests/func/params/test_show.py::test_show_targets\": 0.25046029999992925,\r\n    \"tests/func/params/test_show.py::test_show_toml\": 0.2446489000000156,\r\n    \"tests/func/params/test_show.py::test_show_without_targets_specified[other_params.yaml]\": 0.27128309999994826,\r\n    \"tests/func/params/test_show.py::test_show_without_targets_specified[params.yaml]\": 0.27053269999998975,\r\n    \"tests/func/params/test_show.py::test_top_level_parametrized\": 0.19129430000003822,\r\n    \"tests/func/parsing/test_errors.py::test_failed_to_interpolate\": 0.18144300000000158,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_expects_list_or_dict[${dct.model1}]\": 0.12848279999997203,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_expects_list_or_dict[${foo}]\": 0.15648720000001504,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_expects_list_or_dict[${lst.0}]\": 0.1442303000000038,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_expects_list_or_dict[foobar]\": 0.1321871000000101,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_key_does_not_exists[modelss.123]\": 0.12942199999997683,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_key_does_not_exists[modelss]\": 0.13210100000009106,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_data_syntax_error\": 0.1302176999998892,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_do_definition_item_does_not_exist[foo.bar-stages.build@0.cmd]\": 0.1371279000000527,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_do_definition_item_does_not_exist[item.thresh-stages.build@1.cmd]\": 0.14251179999996566,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_do_syntax_errors\": 0.13003960000008874,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_overwriting_item_in_list[global_data0-item and key are]\": 0.12448729999994157,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_overwriting_item_in_list[global_data1-item is]\": 0.1222735000000057,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_overwriting_item_in_list[global_data2-key is]\": 0.14229789999996,\r\n    \"tests/func/parsing/test_errors.py::test_foreach_wdir_key_does_not_exist\": 0.13372600000002421,\r\n    \"tests/func/parsing/test_errors.py::test_interpolate_nested_iterable\": 0.14655879999997978,\r\n    \"tests/func/parsing/test_errors.py::test_interpolate_non_string\": 0.13041680000003453,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[False-redefine0]\": 0.13570640000000367,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[False-redefine1]\": 0.13289270000001352,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[False-redefine2]\": 0.1272615999999971,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[False-redefine3]\": 0.1316560999999865,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[True-redefine0]\": 0.14314220000005662,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[True-redefine1]\": 0.14537790000002815,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[True-redefine2]\": 0.13925029999995786,\r\n    \"tests/func/parsing/test_errors.py::test_item_key_in_generated_stage_vars[True-redefine3]\": 0.13336179999993192,\r\n    \"tests/func/parsing/test_errors.py::test_local_vars_params_file_not_exist\": 0.20451159999998936,\r\n    \"tests/func/parsing/test_errors.py::test_partial_vars_doesnot_exist\": 0.13008860000002187,\r\n    \"tests/func/parsing/test_errors.py::test_specified_key_does_not_exist\": 0.1631060000000275,\r\n    \"tests/func/parsing/test_errors.py::test_vars_interpolation_errors[${file}_params.yaml]\": 0.1693879000000038,\r\n    \"tests/func/parsing/test_errors.py::test_vars_interpolation_errors[vars_1]\": 0.16431169999987105,\r\n    \"tests/func/parsing/test_errors.py::test_wdir_failed_to_interpolate[${models.foo]}-\\\\n${models.foo]}\\\\n            ^\\\\nParseException: Expected end of text, found ']'  (at char 12), (line:1, col:13)]\": 0.18762429999998176,\r\n    \"tests/func/parsing/test_errors.py::test_wdir_failed_to_interpolate[${models[foobar]}- Could not find 'models.foobar']\": 0.16902380000004769,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_data_from_nested_vars\": 0.17006920000000036,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_data_is_only_resolved_once\": 0.1409080999999901,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_do_syntax_is_checked_once\": 0.1484127999999032,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_interpolate_with_composite_data[foreach_data0-result0-${item.thresh}]\": 0.12445279999991499,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_interpolate_with_composite_data[foreach_data0-result0-${item[thresh]}]\": 0.12519980000007536,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_interpolate_with_composite_data[foreach_data1-result1-${item.thresh}]\": 0.12490900000005922,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_interpolate_with_composite_data[foreach_data1-result1-${item[thresh]}]\": 0.1543631999999775,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_interpolated_simple_list\": 0.12273310000000492,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_partial_interpolations\": 0.19919840000000022,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_imported_vars[test_params.yaml:train,prepare]\": 0.15518950000000586,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_imported_vars[test_params.yaml:train]\": 0.15083899999990535,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_imported_vars[test_params.yaml]\": 0.1749901999999679,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_interpolated_wdir\": 0.13871310000001813,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_interpolated_wdir_and_local_vars[params.yaml:train,prepare]\": 0.16226059999996778,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_interpolated_wdir_and_local_vars[params.yaml]\": 0.1652341000000206,\r\n    \"tests/func/parsing/test_foreach.py::test_foreach_with_local_vars\": 0.14472939999996015,\r\n    \"tests/func/parsing/test_foreach.py::test_mixed_vars_for_foreach_data\": 0.1677091000000246,\r\n    \"tests/func/parsing/test_foreach.py::test_mixed_vars_for_foreach_data_2\": 0.17762529999998833,\r\n    \"tests/func/parsing/test_foreach.py::test_params_file_tracked_for_composite_list\": 0.16980849999993097,\r\n    \"tests/func/parsing/test_foreach.py::test_params_file_with_dict_tracked\": 0.19925219999998944,\r\n    \"tests/func/parsing/test_foreach.py::test_with_composite_list\": 0.1260032999999794,\r\n    \"tests/func/parsing/test_foreach.py::test_with_dict_data\": 0.1329804000000081,\r\n    \"tests/func/parsing/test_foreach.py::test_with_dict_with_non_str_keys\": 0.12633909999993875,\r\n    \"tests/func/parsing/test_foreach.py::test_with_simple_list_data\": 0.11791700000003402,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_cmd_dict[None-None]\": 0.1439924000000019,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_cmd_dict[boolean_optional-append]\": 0.14662889999993922,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_cmd_dict[store_true-nargs]\": 0.16324960000002875,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_resolve_local_tries_to_load_globally_used_files\": 0.15116809999994985,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_resolve_local_tries_to_load_globally_used_params_yaml\": 0.13703460000004952,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_simple\": 0.14924290000004703,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_stage_with_wdir\": 0.14364079999995738,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_and_params_import\": 0.1336033000000043,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_import\": 0.15960929999999962,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_0-False]\": 0.12724699999995437,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_0-True]\": 0.13362110000008443,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_1-False]\": 0.13219050000003563,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_1-True]\": 0.15331199999997125,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_2-False]\": 0.15744130000001633,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_2-True]\": 0.12270669999998063,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_3-False]\": 0.14442820000004986,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_load_partial[vars_3-True]\": 0.14452160000001868,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_vars_relpath_overwrite\": 0.13569209999997156,\r\n    \"tests/func/parsing/test_interpolated_entry.py::test_with_templated_wdir\": 0.1428108999999722,\r\n    \"tests/func/parsing/test_matrix.py::test_matrix_interpolated[matrix0]\": 0.1593235000000277,\r\n    \"tests/func/parsing/test_matrix.py::test_matrix_interpolated[matrix1]\": 0.18672480000003588,\r\n    \"tests/func/parsing/test_matrix.py::test_matrix_key_present[matrix0]\": 0.16882160000005797,\r\n    \"tests/func/parsing/test_matrix.py::test_matrix_key_present[matrix1]\": 0.19714740000000575,\r\n    \"tests/func/parsing/test_resolver.py::test_default_params_file[vars_0]\": 0.14472480000000587,\r\n    \"tests/func/parsing/test_resolver.py::test_default_params_file[vars_1]\": 0.16231190000002016,\r\n    \"tests/func/parsing/test_resolver.py::test_default_params_file[vars_2]\": 0.13667279999998527,\r\n    \"tests/func/parsing/test_resolver.py::test_default_params_file_not_exist\": 0.18006049999996776,\r\n    \"tests/func/parsing/test_resolver.py::test_global_overwrite_error_on_imports\": 0.14821919999997135,\r\n    \"tests/func/parsing/test_resolver.py::test_global_overwrite_vars\": 0.13191389999991543,\r\n    \"tests/func/parsing/test_resolver.py::test_load_vars_from_file\": 0.1413463999999749,\r\n    \"tests/func/parsing/test_resolver.py::test_load_vars_with_relpath\": 0.25372550000003,\r\n    \"tests/func/parsing/test_resolver.py::test_local_declared_vars_overwrite\": 0.1364957999999774,\r\n    \"tests/func/parsing/test_resolver.py::test_local_overwrite_error[params.json-params.json]\": 0.34710509999996475,\r\n    \"tests/func/parsing/test_resolver.py::test_local_overwrite_error[vars_0-build.vars[0]]\": 0.14905879999992067,\r\n    \"tests/func/parsing/test_resolver.py::test_local_vars\": 0.19008880000001227,\r\n    \"tests/func/parsing/test_resolver.py::test_no_params_yaml_and_vars\": 0.1554322999999158,\r\n    \"tests/func/parsing/test_resolver.py::test_partial_vars_doesnot_exist\": 0.15263030000005529,\r\n    \"tests/func/parsing/test_resolver.py::test_resolver\": 0.1734412000000134,\r\n    \"tests/func/parsing/test_resolver.py::test_specified_params_file_not_exist\": 0.16217699999998558,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_0-False]\": 0.157558999999992,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_0-True]\": 0.1264745999999377,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_1-False]\": 0.13397820000000138,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_1-True]\": 0.15042000000005373,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_2-False]\": 0.12980170000003,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_already_loaded_message[vars_2-True]\": 0.1325290999999993,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_interpolation_errors[${file}_params.yaml]\": 0.20060419999992973,\r\n    \"tests/func/parsing/test_resolver.py::test_vars_interpolation_errors[vars_1]\": 0.18543940000000703,\r\n    \"tests/func/parsing/test_top_level.py::test_artifacts\": 0.14650329999989253,\r\n    \"tests/func/parsing/test_top_level.py::test_datasets\": 0.13541109999994205,\r\n    \"tests/func/parsing/test_top_level.py::test_metrics\": 0.14192579999996724,\r\n    \"tests/func/parsing/test_top_level.py::test_params\": 0.1397473999999761,\r\n    \"tests/func/parsing/test_top_level.py::test_plots\": 0.17080190000001494,\r\n    \"tests/func/plots/test_collect.py::test_subdir_config_not_overwritten_by_parents\": 0.3654680000000212,\r\n    \"tests/func/plots/test_diff.py::test_diff_dirty\": 1.4312219999999911,\r\n    \"tests/func/plots/test_diff.py::test_no_commits\": 0.35085469999995667,\r\n    \"tests/func/plots/test_modify.py::test_dir_plots\": 0.47324759999997923,\r\n    \"tests/func/plots/test_modify.py::test_plots_modify_existing_template\": 0.3573830000000271,\r\n    \"tests/func/plots/test_modify.py::test_plots_modify_not_existing_template\": 0.14442609999997558,\r\n    \"tests/func/plots/test_modify.py::test_plots_modify_should_not_change_lockfile\": 0.34480729999995674,\r\n    \"tests/func/plots/test_modify.py::test_unset_nonexistent\": 0.3391591000000176,\r\n    \"tests/func/plots/test_show.py::test_collect_non_existing_dir\": 0.7527599999999666,\r\n    \"tests/func/plots/test_show.py::test_dir_plots\": 0.49907970000003843,\r\n    \"tests/func/plots/test_show.py::test_ignore_parsing_error\": 0.4091096999999877,\r\n    \"tests/func/plots/test_show.py::test_log_errors[dvc.yaml-path_kwargs0]\": 0.5107886000000121,\r\n    \"tests/func/plots/test_show.py::test_log_errors[plot.yaml-path_kwargs1]\": 0.7021611999999777,\r\n    \"tests/func/plots/test_show.py::test_plot_cache_missing\": 0.9331480999999826,\r\n    \"tests/func/plots/test_show.py::test_plot_wrong_metric_type\": 0.5906494999999836,\r\n    \"tests/func/plots/test_show.py::test_plots_binary[jpg]\": 0.7359160999999972,\r\n    \"tests/func/plots/test_show.py::test_plots_binary[svg]\": 0.8953023000000258,\r\n    \"tests/func/plots/test_show.py::test_plots_show_nested_x_dict\": 0.28097090000005664,\r\n    \"tests/func/plots/test_show.py::test_plots_show_non_existing\": 0.2934837999999331,\r\n    \"tests/func/plots/test_show.py::test_plots_show_overlap[False]\": 0.4044963000000621,\r\n    \"tests/func/plots/test_show.py::test_plots_show_overlap[True]\": 0.5023849000000382,\r\n    \"tests/func/plots/test_show.py::test_show_from_subdir\": 0.36088259999991124,\r\n    \"tests/func/plots/test_show.py::test_show_non_plot[False]\": 0.15651450000001432,\r\n    \"tests/func/plots/test_show.py::test_show_non_plot[True]\": 0.37671339999997144,\r\n    \"tests/func/plots/test_show.py::test_show_non_plot_and_plot_with_params\": 0.708958100000018,\r\n    \"tests/func/plots/test_show.py::test_show_plots_defined_with_native_os_path\": 0.3919658999999456,\r\n    \"tests/func/plots/test_show.py::test_show_targets\": 0.20834130000002915,\r\n    \"tests/func/plots/test_show.py::test_top_level_parametrized[${data1}-expanded_config2-expected_datafiles2]\": 0.21378269999996746,\r\n    \"tests/func/plots/test_show.py::test_top_level_parametrized[plot_config0-expanded_config0-expected_datafiles0]\": 0.2116988999999876,\r\n    \"tests/func/plots/test_show.py::test_top_level_parametrized[plot_config1-expanded_config1-expected_datafiles1]\": 0.2667495999999687,\r\n    \"tests/func/plots/test_show.py::test_top_level_plots[plot_config0-expected_datafiles0]\": 0.22352249999994456,\r\n    \"tests/func/plots/test_show.py::test_top_level_plots[plot_config1-expected_datafiles1]\": 0.24122239999996964,\r\n    \"tests/func/plots/test_show.py::test_top_level_plots[plot_config2-expected_datafiles2]\": 0.21663869999997587,\r\n    \"tests/func/repro/test_repro.py::TestReproAlreadyCached::test\": 0.44233299999996234,\r\n    \"tests/func/repro/test_repro.py::TestReproAlreadyCached::test_force_import\": 0.8097182000000203,\r\n    \"tests/func/repro/test_repro.py::TestReproAlreadyCached::test_force_with_dependencies\": 0.6607293999999797,\r\n    \"tests/func/repro/test_repro.py::test_cmd_repro\": 1.0095081999999707,\r\n    \"tests/func/repro/test_repro.py::test_cyclic_graph_error\": 0.9080728000000136,\r\n    \"tests/func/repro/test_repro.py::test_downstream\": 2.411942099999976,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[Dvcfile:name]\": 0.22750590000003967,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[Dvcfile]\": 0.25325349999997115,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[not-existing-stage.json]\": 0.2027564999999072,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[pipelines.yaml:name]\": 0.24169990000001462,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[pipelines.yaml]\": 0.24990230000003066,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[stage.dvc:name]\": 0.18956570000000283,\r\n    \"tests/func/repro/test_repro.py::test_freeze_non_existing[stage.dvc]\": 0.19225720000002866,\r\n    \"tests/func/repro/test_repro.py::test_non_existing_output\": 0.42798149999998714,\r\n    \"tests/func/repro/test_repro.py::test_non_existing_stage_name\": 0.40296299999999974,\r\n    \"tests/func/repro/test_repro.py::test_repro_all_pipelines\": 0.8409965000000739,\r\n    \"tests/func/repro/test_repro.py::test_repro_allow_missing\": 0.5695142999999803,\r\n    \"tests/func/repro/test_repro.py::test_repro_allow_missing_and_pull\": 1.1501956999999834,\r\n    \"tests/func/repro/test_repro.py::test_repro_changed_code\": 0.6990531999999803,\r\n    \"tests/func/repro/test_repro.py::test_repro_changed_data\": 0.8488271000000509,\r\n    \"tests/func/repro/test_repro.py::test_repro_changed_deep_data\": 1.1420626999999968,\r\n    \"tests/func/repro/test_repro.py::test_repro_changed_dir\": 0.6733219999999847,\r\n    \"tests/func/repro/test_repro.py::test_repro_changed_dir_data\": 1.1888351000000057,\r\n    \"tests/func/repro/test_repro.py::test_repro_data_source\": 0.6440561999999659,\r\n    \"tests/func/repro/test_repro.py::test_repro_dep_dir_with_outputs_under_it\": 1.0282210000000305,\r\n    \"tests/func/repro/test_repro.py::test_repro_dep_under_dir\": 0.83027340000001,\r\n    \"tests/func/repro/test_repro.py::test_repro_dry\": 0.7964034000000311,\r\n    \"tests/func/repro/test_repro.py::test_repro_dry_no_exec\": 0.5031210999999303,\r\n    \"tests/func/repro/test_repro.py::test_repro_external_outputs[False]\": 0.6774030999999923,\r\n    \"tests/func/repro/test_repro.py::test_repro_external_outputs[True]\": 0.46038469999996323,\r\n    \"tests/func/repro/test_repro.py::test_repro_fail\": 0.5050530000000322,\r\n    \"tests/func/repro/test_repro.py::test_repro_force\": 0.6815554999999449,\r\n    \"tests/func/repro/test_repro.py::test_repro_force_downstream\": 1.426229400000011,\r\n    \"tests/func/repro/test_repro.py::test_repro_force_downstream_do_not_force_independent_stages\": 1.8231759000000238,\r\n    \"tests/func/repro/test_repro.py::test_repro_frozen\": 1.1989045999999917,\r\n    \"tests/func/repro/test_repro.py::test_repro_frozen_callback\": 0.4590139000000022,\r\n    \"tests/func/repro/test_repro.py::test_repro_frozen_unchanged\": 0.4908328999999867,\r\n    \"tests/func/repro/test_repro.py::test_repro_ignore_errors\": 0.6419171000000006,\r\n    \"tests/func/repro/test_repro.py::test_repro_keep_going\": 0.450359399999968,\r\n    \"tests/func/repro/test_repro.py::test_repro_list_of_commands_in_order[False]\": 0.19824639999995952,\r\n    \"tests/func/repro/test_repro.py::test_repro_list_of_commands_in_order[True]\": 0.20887840000000324,\r\n    \"tests/func/repro/test_repro.py::test_repro_list_of_commands_raise_and_stops_after_failure[False]\": 0.22823670000002494,\r\n    \"tests/func/repro/test_repro.py::test_repro_list_of_commands_raise_and_stops_after_failure[True]\": 0.18410009999996646,\r\n    \"tests/func/repro/test_repro.py::test_repro_metrics_add_unchanged\": 0.5753306999999381,\r\n    \"tests/func/repro/test_repro.py::test_repro_missing_lock_info\": 0.47467919999996866,\r\n    \"tests/func/repro/test_repro.py::test_repro_multiple_params\": 0.6419725999999741,\r\n    \"tests/func/repro/test_repro.py::test_repro_no_commit\": 0.6478619000000094,\r\n    \"tests/func/repro/test_repro.py::test_repro_phony\": 0.8436360000000036,\r\n    \"tests/func/repro/test_repro.py::test_repro_pipeline\": 1.0502811000000065,\r\n    \"tests/func/repro/test_repro.py::test_repro_pipeline_cli\": 0.6927358000000368,\r\n    \"tests/func/repro/test_repro.py::test_repro_pipelines\": 1.1391434000000231,\r\n    \"tests/func/repro/test_repro.py::test_repro_pipelines_cli\": 1.1502829999999449,\r\n    \"tests/func/repro/test_repro.py::test_repro_pulls_continue_without_run_cache\": 1.0244599999999764,\r\n    \"tests/func/repro/test_repro.py::test_repro_pulls_missing_data_source\": 0.8908059999999978,\r\n    \"tests/func/repro/test_repro.py::test_repro_pulls_missing_import\": 1.45085720000003,\r\n    \"tests/func/repro/test_repro.py::test_repro_rm_recursive\": 0.298509700000011,\r\n    \"tests/func/repro/test_repro.py::test_repro_shell\": 0.0010611999999809996,\r\n    \"tests/func/repro/test_repro.py::test_repro_single_item_with_multiple_targets\": 0.5554508999999825,\r\n    \"tests/func/repro/test_repro.py::test_repro_skip_pull_if_no_run_cache_is_passed\": 0.7956775000000107,\r\n    \"tests/func/repro/test_repro.py::test_repro_up_to_date\": 0.4973491000000081,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_cmd_changes\": 0.579681100000073,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_lockfile_gets_deleted\": 0.5708748999999784,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_deps_added_does_not_exist\": 0.23076729999996815,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_deps_is_added_in_dvcfile\": 0.6290411000000518,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_deps_is_moved\": 0.6029598999999735,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_out_overlaps_others_stage_outs\": 0.2545671999999968,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_outs_added_does_not_exist\": 0.3137187999999469,\r\n    \"tests/func/repro/test_repro.py::test_repro_when_new_outs_is_added_in_dvcfile\": 0.5474466999999663,\r\n    \"tests/func/repro/test_repro_allow_missing.py::test_repro_allow_missing\": 0.5116075000000251,\r\n    \"tests/func/repro/test_repro_allow_missing.py::test_repro_allow_missing_and_pull\": 0.8432981000000268,\r\n    \"tests/func/repro/test_repro_allow_missing.py::test_repro_allow_missing_cached\": 0.5020847000000117,\r\n    \"tests/func/repro/test_repro_allow_missing.py::test_repro_allow_missing_upstream_stage_modified\": 1.2886707000000115,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pull_fails\": 0.7426556000000346,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_allow_missing[False]\": 0.7117745999999556,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_allow_missing[True]\": 0.6968059999999809,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_continue_without_run_cache\": 0.7865740000000301,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_missing_data_source\": 0.6968352999999183,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_missing_import\": 1.3220061999999757,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_pulls_persisted_output\": 0.6955814000000373,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_skip_pull_if_no_run_cache_is_passed\": 1.052970700000003,\r\n    \"tests/func/repro/test_repro_pull.py::test_repro_skip_pull_if_single_item_is_passed\": 1.0417836000000307,\r\n    \"tests/func/test_add.py::test_add\": 0.22337029999999913,\r\n    \"tests/func/test_add.py::test_add_colon_in_filename\": 0.001040700000032757,\r\n    \"tests/func/test_add.py::test_add_commit\": 0.2579165999999873,\r\n    \"tests/func/test_add.py::test_add_dir_with_existing_cache\": 0.41711939999999004,\r\n    \"tests/func/test_add.py::test_add_directory\": 0.30083650000005946,\r\n    \"tests/func/test_add.py::test_add_directory_with_forward_slash\": 0.31754209999996874,\r\n    \"tests/func/test_add.py::test_add_does_not_remove_stage_file_on_failure[dvc.repo.index.Index.check_graph]\": 0.29538049999996474,\r\n    \"tests/func/test_add.py::test_add_does_not_remove_stage_file_on_failure[dvc.stage.Stage.add_outs]\": 0.28946510000002945,\r\n    \"tests/func/test_add.py::test_add_empty_files[copy]\": 0.2735349999999812,\r\n    \"tests/func/test_add.py::test_add_empty_files[hardlink]\": 0.266218399999957,\r\n    \"tests/func/test_add.py::test_add_empty_files[symlink]\": 0.2584461999999803,\r\n    \"tests/func/test_add.py::test_add_executable\": 0.001299799999969764,\r\n    \"tests/func/test_add.py::test_add_file_in_dir\": 0.22947199999998702,\r\n    \"tests/func/test_add.py::test_add_file_in_symlink_dir\": 0.13436689999997498,\r\n    \"tests/func/test_add.py::test_add_filtered_files_in_dir[dir\\\\\\\\**\\\\\\\\subdata*-expected_def_paths3-expected_rel_paths3]\": 0.3367860999999266,\r\n    \"tests/func/test_add.py::test_add_filtered_files_in_dir[dir\\\\\\\\subdir\\\\\\\\?subdata-expected_def_paths1-expected_rel_paths1]\": 0.3288568000000396,\r\n    \"tests/func/test_add.py::test_add_filtered_files_in_dir[dir\\\\\\\\subdir\\\\\\\\[aiou]subdata-expected_def_paths2-expected_rel_paths2]\": 0.21213800000003857,\r\n    \"tests/func/test_add.py::test_add_filtered_files_in_dir[dir\\\\\\\\subdir\\\\\\\\subdata*-expected_def_paths0-expected_rel_paths0]\": 0.30685490000007576,\r\n    \"tests/func/test_add.py::test_add_force_overwrite_out\": 0.2808322999999291,\r\n    \"tests/func/test_add.py::test_add_from_data_dir\": 0.3940832000000114,\r\n    \"tests/func/test_add.py::test_add_ignored\": 0.24244040000002087,\r\n    \"tests/func/test_add.py::test_add_long_fname\": 0.0010275999999862506,\r\n    \"tests/func/test_add.py::test_add_modified_dir\": 0.38431979999995747,\r\n    \"tests/func/test_add.py::test_add_on_not_existing_file_should_not_remove_stage_file\": 0.2649236999999971,\r\n    \"tests/func/test_add.py::test_add_optimization_for_hardlink_on_empty_files\": 0.39760729999989053,\r\n    \"tests/func/test_add.py::test_add_parent_dir\": 0.34646930000002385,\r\n    \"tests/func/test_add.py::test_add_pipeline_file\": 0.3983269999999379,\r\n    \"tests/func/test_add.py::test_add_preserve_fields\": 0.2187667000000033,\r\n    \"tests/func/test_add.py::test_add_symlink_dir\": 0.1593280000000732,\r\n    \"tests/func/test_add.py::test_add_symlink_file\": 0.2915504000000624,\r\n    \"tests/func/test_add.py::test_add_to_cache_different_name\": 0.3470150999999646,\r\n    \"tests/func/test_add.py::test_add_to_cache_dir\": 0.3618119000000206,\r\n    \"tests/func/test_add.py::test_add_to_cache_file\": 0.3751513000000273,\r\n    \"tests/func/test_add.py::test_add_to_cache_from_remote\": 0.4571867000000225,\r\n    \"tests/func/test_add.py::test_add_to_cache_not_exists\": 0.26145719999993844,\r\n    \"tests/func/test_add.py::test_add_to_remote_absolute\": 0.4374439000000052,\r\n    \"tests/func/test_add.py::test_add_tracked_file\": 0.4206479999999715,\r\n    \"tests/func/test_add.py::test_add_unicode\": 0.3307029000000625,\r\n    \"tests/func/test_add.py::test_add_unprotected\": 0.28614760000004935,\r\n    \"tests/func/test_add.py::test_add_unsupported_file\": 0.23960789999995313,\r\n    \"tests/func/test_add.py::test_add_updates_to_cloud_versioning_dir\": 0.27616580000005797,\r\n    \"tests/func/test_add.py::test_add_with_cache_link_error\": 0.18520460000002004,\r\n    \"tests/func/test_add.py::test_add_with_out\": 0.24095520000003035,\r\n    \"tests/func/test_add.py::test_cmd_add\": 0.24542709999997214,\r\n    \"tests/func/test_add.py::test_double_add_unchanged_dir\": 0.28557270000004564,\r\n    \"tests/func/test_add.py::test_double_add_unchanged_file\": 0.26062300000006644,\r\n    \"tests/func/test_add.py::test_escape_gitignore_entries\": 0.24416680000001634,\r\n    \"tests/func/test_add.py::test_failed_add_cleanup\": 0.31850170000006983,\r\n    \"tests/func/test_add.py::test_not_raises_on_re_add\": 0.31634130000003324,\r\n    \"tests/func/test_add.py::test_readding_dir_should_not_unprotect_all\": 0.2872294000000011,\r\n    \"tests/func/test_add.py::test_should_collect_dir_cache_only_once\": 0.27603559999988647,\r\n    \"tests/func/test_add.py::test_should_not_checkout_when_adding_cached_copy\": 0.2916169000000082,\r\n    \"tests/func/test_add.py::test_should_place_stage_in_data_dir_if_repository_below_symlink\": 0.21484020000002602,\r\n    \"tests/func/test_add.py::test_should_protect_on_repeated_add[copy]\": 0.270738999999935,\r\n    \"tests/func/test_add.py::test_should_protect_on_repeated_add[hardlink]\": 0.2855639999999653,\r\n    \"tests/func/test_add.py::test_should_protect_on_repeated_add[symlink]\": 0.2652501000000598,\r\n    \"tests/func/test_add.py::test_should_relink_on_repeated_add[copy-hardlink-is_hardlink]\": 0.3022183999999015,\r\n    \"tests/func/test_add.py::test_should_relink_on_repeated_add[copy-symlink-is_symlink]\": 0.30608630000000403,\r\n    \"tests/func/test_add.py::test_should_relink_on_repeated_add[hardlink-copy-<lambda>]\": 0.32222020000000384,\r\n    \"tests/func/test_add.py::test_should_relink_on_repeated_add[symlink-copy-<lambda>]\": 0.35585129999998344,\r\n    \"tests/func/test_add.py::test_should_throw_proper_exception_on_corrupted_stage_file\": 0.2975190000000225,\r\n    \"tests/func/test_add.py::test_should_throw_proper_exception_on_existing_out\": 0.24529200000006313,\r\n    \"tests/func/test_add.py::test_should_update_state_entry_for_directory_after_add\": 0.44023219999996854,\r\n    \"tests/func/test_add.py::test_should_update_state_entry_for_file_after_add\": 0.34753579999988915,\r\n    \"tests/func/test_add.py::test_try_adding_multiple_overlaps\": 0.2431571999999278,\r\n    \"tests/func/test_add.py::test_try_adding_pipeline_tracked_output\": 0.47223850000005996,\r\n    \"tests/func/test_add.py::test_windows_should_add_when_cache_on_different_drive\": 0.201824600000009,\r\n    \"tests/func/test_analytics.py::test_collect_and_send_report\": 0.2581531999999811,\r\n    \"tests/func/test_analytics.py::test_daemon_analytics\": 0.009375399999953515,\r\n    \"tests/func/test_analytics.py::test_main_analytics\": 0.21399900000000116,\r\n    \"tests/func/test_analytics.py::test_scm_dvc_only\": 0.14955400000002328,\r\n    \"tests/func/test_analytics.py::test_scm_git\": 0.19675599999993665,\r\n    \"tests/func/test_analytics.py::test_scm_subrepo\": 0.34907369999996263,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore[ignored-0-True]\": 0.21529790000005278,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore[not_ignored-1-False]\": 0.18026370000006864,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_default_dir\": 0.26343130000009296,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_details[dir\\\\\\\\foobar-0-dir\\\\\\\\.dvcignore:1:foobar\\\\tdir\\\\\\\\foobar\\\\n]\": 0.1903800000000615,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_details[file-0-.dvcignore:1:f*\\\\tfile\\\\n]\": 0.18237559999994346,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_details[foo-0-.dvcignore:2:!foo\\\\tfoo\\\\n]\": 0.1908862999999883,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_details_all\": 0.24732430000005934,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_dir[path0-0]\": 0.23948219999999765,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_dir[path1-1]\": 0.1965277999998989,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_error_args_cases[args0]\": 0.19047829999999522,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_error_args_cases[args1]\": 0.19249660000008362,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_error_args_cases[args2]\": 0.1935739000000467,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_error_args_cases[args3]\": 0.17713900000006788,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_error_args_cases[args4]\": 0.18244690000005903,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_non_matching[False]\": 0.20048250000007783,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_non_matching[True]\": 0.1996592000000419,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_out_side_repo\": 0.2362817000001769,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_stdin_mode[ignored-0-True]\": 0.2505042999999887,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_stdin_mode[not_ignored-1-False]\": 0.2630323999999291,\r\n    \"tests/func/test_check_ignore.py::test_check_ignore_sub_repo\": 0.2633756000000176,\r\n    \"tests/func/test_check_ignore.py::test_check_sub_dir_ignore_file\": 0.2596231999999645,\r\n    \"tests/func/test_checkout.py::TestCheckoutCleanWorkingDir::test\": 0.38110630000005585,\r\n    \"tests/func/test_checkout.py::TestCheckoutCleanWorkingDir::test_force\": 0.3655913000000055,\r\n    \"tests/func/test_checkout.py::test_checkout\": 0.6012491000000182,\r\n    \"tests/func/test_checkout.py::test_checkout_cli\": 0.8530119000000695,\r\n    \"tests/func/test_checkout.py::test_checkout_dir_compat\": 0.30509909999989304,\r\n    \"tests/func/test_checkout.py::test_checkout_directory\": 0.39772500000003674,\r\n    \"tests/func/test_checkout.py::test_checkout_empty_dir\": 0.32532400000002326,\r\n    \"tests/func/test_checkout.py::test_checkout_executable\": 0.2870113000000174,\r\n    \"tests/func/test_checkout.py::test_checkout_file\": 0.2947092000000566,\r\n    \"tests/func/test_checkout.py::test_checkout_missing_md5_in_lock_file_for_outs_deps\": 0.44046830000002046,\r\n    \"tests/func/test_checkout.py::test_checkout_moved_cache_dir_with_symlinks\": 0.4461985000000368,\r\n    \"tests/func/test_checkout.py::test_checkout_no_checksum\": 0.23846259999993435,\r\n    \"tests/func/test_checkout.py::test_checkout_not_cached_file\": 0.35285209999983635,\r\n    \"tests/func/test_checkout.py::test_checkout_partial\": 0.406533000000195,\r\n    \"tests/func/test_checkout.py::test_checkout_partial_subdir\": 0.3881925999999112,\r\n    \"tests/func/test_checkout.py::test_checkout_partial_unchanged\": 0.5505855999999767,\r\n    \"tests/func/test_checkout.py::test_checkout_recursive\": 0.3562659999998914,\r\n    \"tests/func/test_checkout.py::test_checkout_recursive_not_directory\": 0.27791689999992286,\r\n    \"tests/func/test_checkout.py::test_checkout_relink[hardlink-is_hardlink]\": 0.3497247000000243,\r\n    \"tests/func/test_checkout.py::test_checkout_relink[symlink-is_symlink]\": 0.3419618000000355,\r\n    \"tests/func/test_checkout.py::test_checkout_selective_remove\": 0.39663129999996727,\r\n    \"tests/func/test_checkout.py::test_checkout_stats_on_failure\": 0.8075246000000789,\r\n    \"tests/func/test_checkout.py::test_checkout_suggest_git\": 0.1943472999998903,\r\n    \"tests/func/test_checkout.py::test_checkout_target_recursive_should_not_remove_other_used_files\": 0.4229938000000857,\r\n    \"tests/func/test_checkout.py::test_checkout_with_deps\": 0.5434178000000429,\r\n    \"tests/func/test_checkout.py::test_checkout_with_deps_cli\": 0.6640707999999904,\r\n    \"tests/func/test_checkout.py::test_checkout_with_relink_existing[copy]\": 0.4169368000000304,\r\n    \"tests/func/test_checkout.py::test_checkout_with_relink_existing[hardlink]\": 0.36636650000002646,\r\n    \"tests/func/test_checkout.py::test_checkout_with_relink_existing[symlink]\": 0.3817441000001054,\r\n    \"tests/func/test_checkout.py::test_checkouts_for_pipeline_tracked_outs\": 1.2298499999999422,\r\n    \"tests/func/test_checkout.py::test_checkouts_on_same_stage_name_and_output_name\": 0.7647766000000047,\r\n    \"tests/func/test_checkout.py::test_checkouts_with_different_addressing\": 1.2168963999998823,\r\n    \"tests/func/test_checkout.py::test_gitignore_basic\": 0.6002985999999737,\r\n    \"tests/func/test_checkout.py::test_gitignore_when_checkout\": 1.270755199999826,\r\n    \"tests/func/test_checkout.py::test_partial_checkout[dir\\\\\\\\subdir\\\\\\\\file]\": 0.4440303999999742,\r\n    \"tests/func/test_checkout.py::test_partial_checkout[dir\\\\\\\\subdir]\": 0.44663820000005217,\r\n    \"tests/func/test_checkout.py::test_remove_files_when_checkout\": 0.5229241000000684,\r\n    \"tests/func/test_checkout.py::test_stats_does_not_show_changes_by_default\": 0.5245118999999931,\r\n    \"tests/func/test_checkout.py::test_stats_on_added_file_from_tracked_dir\": 0.7492703999998866,\r\n    \"tests/func/test_checkout.py::test_stats_on_checkout\": 1.7117934000000332,\r\n    \"tests/func/test_checkout.py::test_stats_on_empty_checkout\": 0.661128299999973,\r\n    \"tests/func/test_checkout.py::test_stats_on_removed_file_from_tracked_dir\": 0.9060216999999966,\r\n    \"tests/func/test_checkout.py::test_stats_on_show_changes_does_not_show_summary\": 0.5733001999999487,\r\n    \"tests/func/test_checkout.py::test_stats_on_updated_file_from_tracked_dir\": 0.7445966999999882,\r\n    \"tests/func/test_cli.py::test_add\": 0.16391589999989264,\r\n    \"tests/func/test_cli.py::test_argparse\": 0.11586220000015146,\r\n    \"tests/func/test_cli.py::test_cd\": 0.16199100000005728,\r\n    \"tests/func/test_cli.py::test_checkout\": 0.23483790000000226,\r\n    \"tests/func/test_cli.py::test_config_list\": 0.0035698999998885483,\r\n    \"tests/func/test_cli.py::test_config_unset\": 0.11844689999998081,\r\n    \"tests/func/test_cli.py::test_find_root\": 0.1724064999999655,\r\n    \"tests/func/test_cli.py::test_pull\": 0.1517998000000489,\r\n    \"tests/func/test_cli.py::test_push\": 0.1573064000000386,\r\n    \"tests/func/test_cli.py::test_remove\": 0.180031299999996,\r\n    \"tests/func/test_cli.py::test_repro\": 0.15573790000007648,\r\n    \"tests/func/test_cli.py::test_status\": 0.14812940000001618,\r\n    \"tests/func/test_cli.py::test_unknown_command_help\": 0.007515200000057121,\r\n    \"tests/func/test_cli.py::test_unknown_subcommand_help\": 0.0067342999999482345,\r\n    \"tests/func/test_commit.py::test_commit_changed_md5\": 0.2195017999999891,\r\n    \"tests/func/test_commit.py::test_commit_dos2unix\": 0.3207045999999991,\r\n    \"tests/func/test_commit.py::test_commit_force\": 0.3498164000001225,\r\n    \"tests/func/test_commit.py::test_commit_granular_dir\": 0.3889625999998998,\r\n    \"tests/func/test_commit.py::test_commit_granular_output\": 0.3346500999999762,\r\n    \"tests/func/test_commit.py::test_commit_granular_output_dir\": 0.255196200000114,\r\n    \"tests/func/test_commit.py::test_commit_granular_output_file\": 0.22752040000000306,\r\n    \"tests/func/test_commit.py::test_commit_no_exec\": 0.2550777000000153,\r\n    \"tests/func/test_commit.py::test_commit_no_exec_missing_dep\": 0.1423850000001039,\r\n    \"tests/func/test_commit.py::test_commit_no_exec_missing_out\": 0.17701290000002246,\r\n    \"tests/func/test_commit.py::test_commit_pipeline_stage\": 0.6073219999999537,\r\n    \"tests/func/test_commit.py::test_commit_preserve_fields\": 0.30999370000006365,\r\n    \"tests/func/test_commit.py::test_commit_recursive\": 0.3930149000000256,\r\n    \"tests/func/test_commit.py::test_commit_updates_to_cloud_versioning_dir\": 0.2628995999999688,\r\n    \"tests/func/test_commit.py::test_commit_with_deps\": 0.4309333000001061,\r\n    \"tests/func/test_commit.py::test_imported_entries_unchanged\": 0.8517232999998896,\r\n    \"tests/func/test_config.py::test_config_gdrive_fields\": 0.12703910000016094,\r\n    \"tests/func/test_config.py::test_config_get[args0-0-False]\": 0.20948869999995168,\r\n    \"tests/func/test_config.py::test_config_get[args1-0-myremote]\": 0.1829517000001033,\r\n    \"tests/func/test_config.py::test_config_get[args2-0-treeverse]\": 0.19686669999998685,\r\n    \"tests/func/test_config.py::test_config_get[args3-0-treeverse]\": 0.1675059000000374,\r\n    \"tests/func/test_config.py::test_config_get[args4-251-option 'profile' doesn't exist]\": 0.18096200000002227,\r\n    \"tests/func/test_config.py::test_config_get[args5-0-gs://bucket/path]\": 0.12368970000022728,\r\n    \"tests/func/test_config.py::test_config_get[args6-0-gs://bucket/path]\": 0.13007389999995667,\r\n    \"tests/func/test_config.py::test_config_get[args7-251-remote 'other' doesn't exist]\": 0.13832799999988765,\r\n    \"tests/func/test_config.py::test_config_get_in_non_dvc_repo[args0-251]\": 0.014465299999983472,\r\n    \"tests/func/test_config.py::test_config_get_in_non_dvc_repo[args1-251]\": 0.014442199999962213,\r\n    \"tests/func/test_config.py::test_config_get_in_non_dvc_repo[args2-0]\": 0.013205699999957687,\r\n    \"tests/func/test_config.py::test_config_list\": 0.13482399999986683,\r\n    \"tests/func/test_config.py::test_config_list_in_non_dvc_repo[args0-251]\": 0.019574000000034175,\r\n    \"tests/func/test_config.py::test_config_list_in_non_dvc_repo[args1-251]\": 0.01389150000011341,\r\n    \"tests/func/test_config.py::test_config_list_in_non_dvc_repo[args2-0]\": 0.013700800000037816,\r\n    \"tests/func/test_config.py::test_config_loads_without_error_for_non_dvc_repo\": 0.013231799999971372,\r\n    \"tests/func/test_config.py::test_config_remote\": 0.15216689999999744,\r\n    \"tests/func/test_config.py::test_config_set\": 0.17704680000008466,\r\n    \"tests/func/test_config.py::test_config_set_in_non_dvc_repo\": 0.014170100000114871,\r\n    \"tests/func/test_config.py::test_config_set_local\": 0.20866990000001806,\r\n    \"tests/func/test_config.py::test_config_show_origin_merged\": 0.14643520000004173,\r\n    \"tests/func/test_config.py::test_config_show_origin_single\": 0.15067039999996723,\r\n    \"tests/func/test_config.py::test_list_bad_args[args0]\": 0.12268940000012662,\r\n    \"tests/func/test_config.py::test_list_bad_args[args1]\": 0.14213120000010804,\r\n    \"tests/func/test_config.py::test_list_bad_args[args2]\": 0.14091080000002876,\r\n    \"tests/func/test_config.py::test_load_relative_paths[cert_path-webdavs://example.com/files/USERNAME/]\": 0.1395637999999053,\r\n    \"tests/func/test_config.py::test_load_relative_paths[credentialpath-gs://my-bucket/path]\": 0.11936819999993986,\r\n    \"tests/func/test_config.py::test_load_relative_paths[credentialpath-s3://mybucket/my/path]\": 0.12908759999993435,\r\n    \"tests/func/test_config.py::test_load_relative_paths[gdrive_service_account_json_file_path-gdrive://root/test]\": 0.1351872000000185,\r\n    \"tests/func/test_config.py::test_load_relative_paths[gdrive_user_credentials_file-gdrive://root/test]\": 0.12179250000008324,\r\n    \"tests/func/test_config.py::test_load_relative_paths[key_path-webdavs://example.com/files/USERNAME/]\": 0.14102509999997892,\r\n    \"tests/func/test_config.py::test_load_relative_paths[keyfile-ssh://user@example.com:1234/path/to/dir]\": 0.14036539999995057,\r\n    \"tests/func/test_config.py::test_merging_two_levels\": 0.14326260000007096,\r\n    \"tests/func/test_config.py::test_set_invalid_key\": 0.12056800000004841,\r\n    \"tests/func/test_daemon.py::test_analytics\": 1.1433796000001166,\r\n    \"tests/func/test_daemon.py::test_updater\": 2.4364567999999736,\r\n    \"tests/func/test_data_cloud.py::TestRemote::test\": 0.6962589999999409,\r\n    \"tests/func/test_data_cloud.py::TestRemote::test_pull_00_prefix\": 0.6842130000001134,\r\n    \"tests/func/test_data_cloud.py::TestRemote::test_pull_no_00_prefix\": 0.8140448000000333,\r\n    \"tests/func/test_data_cloud.py::TestRemote::test_stage_cache_push_pull\": 0.57795590000012,\r\n    \"tests/func/test_data_cloud.py::test_cloud_cli\": 1.659253900000067,\r\n    \"tests/func/test_data_cloud.py::test_data_cloud_error_cli\": 0.22858320000000276,\r\n    \"tests/func/test_data_cloud.py::test_dvc_pull_pipeline_stages\": 3.110578300000043,\r\n    \"tests/func/test_data_cloud.py::test_fetch_stats[fs0-2 files fetched]\": 0.6439170999998396,\r\n    \"tests/func/test_data_cloud.py::test_fetch_stats[fs1-1 file fetched]\": 0.511205700000005,\r\n    \"tests/func/test_data_cloud.py::test_fetch_stats[fs2-Everything is up to date.]\": 0.26188230000002477,\r\n    \"tests/func/test_data_cloud.py::test_hash_recalculation\": 0.3639411999999993,\r\n    \"tests/func/test_data_cloud.py::test_missing_cache\": 0.4893174999999701,\r\n    \"tests/func/test_data_cloud.py::test_output_remote\": 1.5091685999999527,\r\n    \"tests/func/test_data_cloud.py::test_output_target_remote\": 1.2829474000000118,\r\n    \"tests/func/test_data_cloud.py::test_pipeline_file_target_ops\": 1.7815956999999116,\r\n    \"tests/func/test_data_cloud.py::test_pull_allow_missing\": 0.6819165000000567,\r\n    \"tests/func/test_data_cloud.py::test_pull_external_dvc_imports\": 1.9060864000000493,\r\n    \"tests/func/test_data_cloud.py::test_pull_external_dvc_imports_mixed\": 1.6527059000000008,\r\n    \"tests/func/test_data_cloud.py::test_pull_git_imports[erepo_dir]\": 1.5395055000000184,\r\n    \"tests/func/test_data_cloud.py::test_pull_git_imports[git_dir]\": 1.045524300000011,\r\n    \"tests/func/test_data_cloud.py::test_pull_granular_excluding_import_that_cannot_be_pulled\": 0.7391129000000092,\r\n    \"tests/func/test_data_cloud.py::test_pull_partial\": 0.8865887999999131,\r\n    \"tests/func/test_data_cloud.py::test_pull_partial_import\": 0.34923349999996844,\r\n    \"tests/func/test_data_cloud.py::test_pull_partial_import_missing\": 0.25161370000012084,\r\n    \"tests/func/test_data_cloud.py::test_pull_partial_import_modified\": 0.3184686000000738,\r\n    \"tests/func/test_data_cloud.py::test_pull_stats\": 0.8137879000000794,\r\n    \"tests/func/test_data_cloud.py::test_push_pull_all[all_branches-3]\": 1.3515395000001718,\r\n    \"tests/func/test_data_cloud.py::test_push_pull_all[all_commits-3]\": 2.8969190000000253,\r\n    \"tests/func/test_data_cloud.py::test_push_pull_all[all_tags-2]\": 1.2282873999999993,\r\n    \"tests/func/test_data_cloud.py::test_push_pull_fetch_pipeline_stages\": 0.9839677999999594,\r\n    \"tests/func/test_data_cloud.py::test_push_stats[fs0-2 files pushed]\": 0.5599261000000979,\r\n    \"tests/func/test_data_cloud.py::test_push_stats[fs1-1 file pushed]\": 0.6247941999999966,\r\n    \"tests/func/test_data_cloud.py::test_push_stats[fs2-Everything is up to date]\": 0.35563230000002477,\r\n    \"tests/func/test_data_cloud.py::test_target_remote\": 0.8110535999999229,\r\n    \"tests/func/test_data_cloud.py::test_verify_hashes\": 1.1686573000000635,\r\n    \"tests/func/test_data_cloud.py::test_warn_on_outdated_stage\": 0.4580950000000712,\r\n    \"tests/func/test_data_status.py::test_directory\": 0.7879010000000335,\r\n    \"tests/func/test_data_status.py::test_empty_dir\": 0.4069728000000623,\r\n    \"tests/func/test_data_status.py::test_file\": 0.6691374999999198,\r\n    \"tests/func/test_data_status.py::test_git_committed_missing_cache_missing_workspace\": 0.8985119999998687,\r\n    \"tests/func/test_data_status.py::test_git_committed_missing_cache_workspace_exists\": 0.6913467999999057,\r\n    \"tests/func/test_data_status.py::test_git_to_dvc_path_wdir_transformation[None]\": 0.3991748999998208,\r\n    \"tests/func/test_data_status.py::test_git_to_dvc_path_wdir_transformation[path1]\": 0.5411543999999822,\r\n    \"tests/func/test_data_status.py::test_missing_cache_missing_workspace\": 0.593168799999944,\r\n    \"tests/func/test_data_status.py::test_missing_cache_workspace_exists\": 0.5601576999999907,\r\n    \"tests/func/test_data_status.py::test_missing_dir_object_from_head\": 0.669361800000047,\r\n    \"tests/func/test_data_status.py::test_missing_dir_object_from_index\": 0.7362090000000308,\r\n    \"tests/func/test_data_status.py::test_missing_remote_cache\": 0.9769824999999628,\r\n    \"tests/func/test_data_status.py::test_new_empty_git_repo\": 0.39809409999998024,\r\n    \"tests/func/test_data_status.py::test_noscm_repo\": 0.21185890000003837,\r\n    \"tests/func/test_data_status.py::test_outs_with_no_hashes\": 0.43225139999992734,\r\n    \"tests/func/test_data_status.py::test_outs_with_no_hashes_and_with_uncommitted_files\": 0.40048109999997905,\r\n    \"tests/func/test_data_status.py::test_partial_missing_cache\": 0.49934910000001764,\r\n    \"tests/func/test_data_status.py::test_root_from_dir_to_file\": 0.6534549999998944,\r\n    \"tests/func/test_data_status.py::test_root_from_file_to_dir\": 0.46668609999994715,\r\n    \"tests/func/test_data_status.py::test_skip_uncached_pipeline_outputs\": 0.40500140000006013,\r\n    \"tests/func/test_data_status.py::test_subdir\": 1.0634731000000102,\r\n    \"tests/func/test_data_status.py::test_tracked_directory_deep\": 0.6695379000001367,\r\n    \"tests/func/test_data_status.py::test_unchanged\": 0.7081071999999722,\r\n    \"tests/func/test_data_status.py::test_untracked_newly_added_files\": 0.4063917999999376,\r\n    \"tests/func/test_dataset.py::test_collect\": 0.1674499000000651,\r\n    \"tests/func/test_dataset.py::test_dvc\": 0.46001330000012786,\r\n    \"tests/func/test_dataset.py::test_dvc_dataset_pipeline\": 0.7948393000000351,\r\n    \"tests/func/test_dataset.py::test_dvc_dump\": 0.17996560000005957,\r\n    \"tests/func/test_dataset.py::test_datachain\": 0.22482930000001033,\r\n    \"tests/func/test_dataset.py::test_datachain_dataset_pipeline\": 0.43405649999999696,\r\n    \"tests/func/test_dataset.py::test_datachain_dump\": 0.1858166000000665,\r\n    \"tests/func/test_dataset.py::test_invalidation\": 0.16291839999996682,\r\n    \"tests/func/test_dataset.py::test_parametrized\": 0.19277699999997822,\r\n    \"tests/func/test_dataset.py::test_pipeline_when_not_in_sync\": 0.2063928999999689,\r\n    \"tests/func/test_dataset.py::test_url\": 1.1613198999999668,\r\n    \"tests/func/test_dataset.py::test_url_dataset_pipeline\": 0.7589528000000882,\r\n    \"tests/func/test_dataset.py::test_url_dump\": 0.16424820000008822,\r\n    \"tests/func/test_diff.py::test_abs_target\": 0.4775963000000729,\r\n    \"tests/func/test_diff.py::test_added\": 0.4151034000000209,\r\n    \"tests/func/test_diff.py::test_added_deep\": 0.46422310000002653,\r\n    \"tests/func/test_diff.py::test_deleted[False]\": 0.683859100000177,\r\n    \"tests/func/test_diff.py::test_deleted[True]\": 0.4685372000000143,\r\n    \"tests/func/test_diff.py::test_diff_add_similar_files[False]\": 0.6157927000000427,\r\n    \"tests/func/test_diff.py::test_diff_add_similar_files[True]\": 0.6788806999999224,\r\n    \"tests/func/test_diff.py::test_diff_dirty\": 0.6926497000000609,\r\n    \"tests/func/test_diff.py::test_diff_granular\": 0.8422219000000268,\r\n    \"tests/func/test_diff.py::test_diff_no_cache\": 0.6685171999999966,\r\n    \"tests/func/test_diff.py::test_diff_rename_file[False]\": 0.7659852000000456,\r\n    \"tests/func/test_diff.py::test_diff_rename_file[True]\": 0.822601000000077,\r\n    \"tests/func/test_diff.py::test_diff_rename_folder[False]\": 0.5709835999999768,\r\n    \"tests/func/test_diff.py::test_diff_rename_folder[True]\": 0.6485969999998815,\r\n    \"tests/func/test_diff.py::test_directories\": 1.1286296000000675,\r\n    \"tests/func/test_diff.py::test_modified\": 0.6859245999999075,\r\n    \"tests/func/test_diff.py::test_modified_subrepo\": 0.9839555999999448,\r\n    \"tests/func/test_diff.py::test_no_cache_entry\": 0.7368651000000455,\r\n    \"tests/func/test_diff.py::test_no_changes\": 0.6226969000000508,\r\n    \"tests/func/test_diff.py::test_no_commits\": 0.5326039999998784,\r\n    \"tests/func/test_diff.py::test_no_scm\": 0.253040399999918,\r\n    \"tests/func/test_diff.py::test_refs\": 0.9594080000000531,\r\n    \"tests/func/test_diff.py::test_rename_multiple_files_same_hashes\": 0.9475248000001102,\r\n    \"tests/func/test_diff.py::test_same_rev\": 0.5410654000000932,\r\n    \"tests/func/test_diff.py::test_targets_file_and_dir\": 0.9251517999999805,\r\n    \"tests/func/test_diff.py::test_targets_missing_path\": 0.7832548999999744,\r\n    \"tests/func/test_diff.py::test_targets_single_dir\": 0.947497799999951,\r\n    \"tests/func/test_diff.py::test_targets_single_dir_with_file\": 0.9124318000000358,\r\n    \"tests/func/test_diff.py::test_targets_single_file\": 0.7914005999999745,\r\n    \"tests/func/test_diff.py::test_targets_single_file_in_dir\": 0.7911016000000473,\r\n    \"tests/func/test_diff.py::test_targets_single_file_in_dir_with_file\": 0.9484240999998974,\r\n    \"tests/func/test_diff.py::test_targets_two_files_in_dir\": 0.7935872000000472,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[escape-**]\": 0.001256399999988389,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[escape-*]\": 0.0013634000000593005,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[escape-?]\": 0.001416800000015428,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[escape-[!seq]]\": 0.43633040000008805,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[escape-[seq]]\": 0.39428320000001804,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_directory[plain]\": 0.49833910000006654,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[escape-**]\": 0.001109700000029079,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[escape-*]\": 0.0011691999999356995,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[escape-?]\": 0.0011173999999982698,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[escape-[!seq]]\": 0.48147240000002967,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[escape-[seq]]\": 0.3986328999999387,\r\n    \"tests/func/test_download.py::test_lfs_prefetch_file[plain]\": 0.40905340000006163,\r\n    \"tests/func/test_du.py::test_du\": 2.0110667999999805,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_dos2unix\": 0.15745239999989735,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_dump_preserves_comments\": 0.1488390999999183,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_dump_preserves_desc\": 0.38740600000005543,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_dump_preserves_meta\": 0.36510500000008506,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_load_dump_stage_with_desc_meta\": 0.18181300000003375,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_load_with_plots\": 0.26786539999989145,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_try_dumping_parametrized_stage[data0-build-us]\": 0.15425709999999526,\r\n    \"tests/func/test_dvcfile.py::test_dvcfile_try_dumping_parametrized_stage[data1-build@us]\": 0.16851990000020578,\r\n    \"tests/func/test_dvcfile.py::test_has_stage_with_name\": 0.33740469999986544,\r\n    \"tests/func/test_dvcfile.py::test_load_all_multistage\": 0.5214114999999993,\r\n    \"tests/func/test_dvcfile.py::test_load_all_singlestage\": 0.23141340000006494,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage\": 0.7379988999999796,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage_dvcfiles\": 0.2416733000000022,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage_lockfile\": 0.6938364000000092,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage_on_lockfile_format_error\": 0.39975270000002183,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage_preserves_comment\": 0.4730415000001358,\r\n    \"tests/func/test_dvcfile.py::test_remove_stage_removes_dvcfiles_if_no_stages_left\": 0.3775637999999617,\r\n    \"tests/func/test_dvcfile.py::test_run_load_one_for_multistage\": 0.502129600000103,\r\n    \"tests/func/test_dvcfile.py::test_run_load_one_for_multistage_non_existing\": 0.2074472000000469,\r\n    \"tests/func/test_dvcfile.py::test_run_load_one_for_multistage_non_existing_stage_name\": 0.34883259999980964,\r\n    \"tests/func/test_dvcfile.py::test_run_load_one_on_single_stage\": 0.597080099999971,\r\n    \"tests/func/test_dvcfile.py::test_stage_collection\": 0.44439489999990656,\r\n    \"tests/func/test_dvcfile.py::test_try_get_single_stage_from_pipeline_file\": 0.2724720000001071,\r\n    \"tests/func/test_external_repo.py::test_cache_reused\": 1.383084300000064,\r\n    \"tests/func/test_external_repo.py::test_external_repo\": 1.2636050999999497,\r\n    \"tests/func/test_external_repo.py::test_known_sha\": 0.35369119999995746,\r\n    \"tests/func/test_external_repo.py::test_pull_subdir_file\": 0.5718703000000005,\r\n    \"tests/func/test_external_repo.py::test_relative_remote\": 0.973195500000088,\r\n    \"tests/func/test_external_repo.py::test_shallow_clone_branch\": 1.2217249000000265,\r\n    \"tests/func/test_external_repo.py::test_shallow_clone_tag\": 1.015111999999931,\r\n    \"tests/func/test_external_repo.py::test_source_change\": 0.7035343999998531,\r\n    \"tests/func/test_external_repo.py::test_subrepos_are_ignored\": 1.3591140999999425,\r\n    \"tests/func/test_external_repo.py::test_subrepos_are_ignored_for_git_tracked_dirs\": 1.0593133999998372,\r\n    \"tests/func/test_fs.py::test_cleanfs_subrepo\": 0.46248530000002575,\r\n    \"tests/func/test_fs.py::test_walk_dont_ignore_subrepos\": 0.6838972999998987,\r\n    \"tests/func/test_gc.py::test_all_commits\": 2.156316600000082,\r\n    \"tests/func/test_gc.py::test_date\": 2.269047599999908,\r\n    \"tests/func/test_gc.py::test_gc_all_experiments\": 0.6611699000000044,\r\n    \"tests/func/test_gc.py::test_gc_api\": 0.3851829999999836,\r\n    \"tests/func/test_gc.py::test_gc_branches_tags\": 1.0996564000000717,\r\n    \"tests/func/test_gc.py::test_gc_cli\": 0.41177300000003925,\r\n    \"tests/func/test_gc.py::test_gc_cloud_positive\": 0.34791889999996783,\r\n    \"tests/func/test_gc.py::test_gc_cloud_remote_field\": 0.795977999999991,\r\n    \"tests/func/test_gc.py::test_gc_cloud_remove_order\": 0.8154675999999199,\r\n    \"tests/func/test_gc.py::test_gc_cloud_with_or_without_specifier\": 0.933874499999888,\r\n    \"tests/func/test_gc.py::test_gc_dry\": 0.32338210000000345,\r\n    \"tests/func/test_gc.py::test_gc_logging\": 0.31315829999994094,\r\n    \"tests/func/test_gc.py::test_gc_multiple_dvc_repos\": 0.8834275000000389,\r\n    \"tests/func/test_gc.py::test_gc_no_dir_cache\": 0.3972282999999379,\r\n    \"tests/func/test_gc.py::test_gc_no_unpacked_dir\": 0.23377059999995708,\r\n    \"tests/func/test_gc.py::test_gc_not_collect_pipeline_tracked_files\": 0.39117349999992257,\r\n    \"tests/func/test_gc.py::test_gc_not_in_remote\": 0.7354806999999255,\r\n    \"tests/func/test_gc.py::test_gc_not_in_remote_cloud\": 0.18755820000001222,\r\n    \"tests/func/test_gc.py::test_gc_not_in_remote_remote_arg\": 0.8360622999999805,\r\n    \"tests/func/test_gc.py::test_gc_not_in_remote_with_remote_field\": 0.6703756999999086,\r\n    \"tests/func/test_gc.py::test_gc_rev_num\": 0.7199834999998984,\r\n    \"tests/func/test_gc.py::test_gc_skip_failed\": 0.1434530999998742,\r\n    \"tests/func/test_gc.py::test_gc_with_possible_args_positive\": 0.3313971000000038,\r\n    \"tests/func/test_gc.py::test_gc_without_workspace[]\": 0.2372054999999591,\r\n    \"tests/func/test_gc.py::test_gc_without_workspace[c]\": 0.2266785000000482,\r\n    \"tests/func/test_gc.py::test_gc_without_workspace_on_tags_branches_commits\": 0.20780869999998686,\r\n    \"tests/func/test_gc.py::test_gc_without_workspace_raises_error\": 0.12934850000010556,\r\n    \"tests/func/test_get.py::test_absolute_file_outside_git_repo\": 0.15017870000008315,\r\n    \"tests/func/test_get.py::test_absolute_file_outside_repo\": 0.38033600000005663,\r\n    \"tests/func/test_get.py::test_cache_type_is_properly_overridden\": 0.5857792000000472,\r\n    \"tests/func/test_get.py::test_get_a_dvc_file\": 0.20116699999994125,\r\n    \"tests/func/test_get.py::test_get_complete_repo\": 1.4647586999999476,\r\n    \"tests/func/test_get.py::test_get_file_from_dir\": 0.9006957999998804,\r\n    \"tests/func/test_get.py::test_get_from_non_dvc_master\": 0.6872383000001037,\r\n    \"tests/func/test_get.py::test_get_from_non_dvc_repo\": 0.2128956000000244,\r\n    \"tests/func/test_get.py::test_get_from_subrepos[files0-False]\": 0.7780077999999548,\r\n    \"tests/func/test_get.py::test_get_from_subrepos[files0-True]\": 0.8611526999999342,\r\n    \"tests/func/test_get.py::test_get_from_subrepos[files1-False]\": 0.7101842999999235,\r\n    \"tests/func/test_get.py::test_get_from_subrepos[files1-True]\": 0.8807715999998891,\r\n    \"tests/func/test_get.py::test_get_git_dir[erepo_dir]\": 0.38611629999991237,\r\n    \"tests/func/test_get.py::test_get_git_dir[git_dir]\": 0.2643609000000424,\r\n    \"tests/func/test_get.py::test_get_git_file[erepo_dir]\": 0.3733656000000565,\r\n    \"tests/func/test_get.py::test_get_git_file[git_dir]\": 0.2132688000001508,\r\n    \"tests/func/test_get.py::test_get_mixed_dir\": 0.5913230999999541,\r\n    \"tests/func/test_get.py::test_get_pipeline_tracked_outs\": 1.2631304000001364,\r\n    \"tests/func/test_get.py::test_get_repo_broken_dir\": 0.48770490000003974,\r\n    \"tests/func/test_get.py::test_get_repo_dir\": 0.5975047000000586,\r\n    \"tests/func/test_get.py::test_get_repo_file\": 0.5339506000000256,\r\n    \"tests/func/test_get.py::test_get_repo_file_no_override\": 0.8450057000001152,\r\n    \"tests/func/test_get.py::test_get_repo_file_with_override\": 0.9565097000000833,\r\n    \"tests/func/test_get.py::test_get_repo_rev\": 0.47190690000013547,\r\n    \"tests/func/test_get.py::test_get_to_dir[.]\": 0.7166565999999648,\r\n    \"tests/func/test_get.py::test_get_to_dir[dir/subdir]\": 0.543013799999926,\r\n    \"tests/func/test_get.py::test_get_to_dir[dir]\": 0.4927519000000302,\r\n    \"tests/func/test_get.py::test_get_url_git_only_repo\": 0.20925550000004023,\r\n    \"tests/func/test_get.py::test_get_url_not_existing\": 0.39218749999997726,\r\n    \"tests/func/test_get.py::test_get_url_positive\": 0.6426764000000276,\r\n    \"tests/func/test_get.py::test_granular_get_from_subrepos\": 1.2335647000001018,\r\n    \"tests/func/test_get.py::test_non_cached_output\": 0.6284569000000602,\r\n    \"tests/func/test_get.py::test_unknown_path\": 0.2856510999999955,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_dir\": 0.02241720000006353,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_file\": 0.025068900000064787,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_url_nonexistent\": 0.012923699999987548,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_url_to_dir[.]\": 0.025865899999871544,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_url_to_dir[dir/subdir]\": 0.023055399999861947,\r\n    \"tests/func/test_get_url.py::TestGetUrl::test_get_url_to_dir[dir]\": 0.0350884000000633,\r\n    \"tests/func/test_get_url.py::test_get_dir\": 0.015264599999909478,\r\n    \"tests/func/test_get_url.py::test_get_file\": 0.014133099999980914,\r\n    \"tests/func/test_get_url.py::test_get_file_conflict_and_override\": 0.016250000000013642,\r\n    \"tests/func/test_get_url.py::test_get_url_config\": 0.1669968000001063,\r\n    \"tests/func/test_get_url.py::test_get_url_nonexistent\": 0.01504759999977523,\r\n    \"tests/func/test_get_url.py::test_get_url_to_dir[.]\": 0.02255650000006426,\r\n    \"tests/func/test_get_url.py::test_get_url_to_dir[dir/subdir]\": 0.021038400000179536,\r\n    \"tests/func/test_get_url.py::test_get_url_to_dir[dir]\": 0.01743029999988721,\r\n    \"tests/func/test_ignore.py::test_dvcignore_in_out_dir\": 0.15225010000006023,\r\n    \"tests/func/test_ignore.py::test_ignore[\\\\u0442\\\\u0435\\\\u0441\\\\u0442]\": 0.14757190000000264,\r\n    \"tests/func/test_ignore.py::test_ignore[ignored]\": 0.15114039999991746,\r\n    \"tests/func/test_ignore.py::test_ignore_blank_line\": 0.18660330000000158,\r\n    \"tests/func/test_ignore.py::test_ignore_collecting_dvcignores[dir/subdir]\": 0.16490030000011302,\r\n    \"tests/func/test_ignore.py::test_ignore_collecting_dvcignores[dir]\": 0.14711049999993975,\r\n    \"tests/func/test_ignore.py::test_ignore_directory\": 0.1992541000000756,\r\n    \"tests/func/test_ignore.py::test_ignore_external\": 0.2136403000000655,\r\n    \"tests/func/test_ignore.py::test_ignore_file_in_parent_path[data_struct0-pattern_list0-result_set0]\": 0.1942944999999554,\r\n    \"tests/func/test_ignore.py::test_ignore_file_in_parent_path[data_struct1-pattern_list1-result_set1]\": 0.19079089999991083,\r\n    \"tests/func/test_ignore.py::test_ignore_file_in_parent_path[data_struct2-pattern_list2-result_set2]\": 0.2009209999999939,\r\n    \"tests/func/test_ignore.py::test_ignore_in_added_dir\": 0.3251916000000392,\r\n    \"tests/func/test_ignore.py::test_ignore_on_branch\": 0.35599500000000717,\r\n    \"tests/func/test_ignore.py::test_ignore_resurface_subrepo\": 0.6824492999999165,\r\n    \"tests/func/test_ignore.py::test_ignore_sub_directory\": 0.20163170000012087,\r\n    \"tests/func/test_ignore.py::test_ignored_output\": 0.48675440000010894,\r\n    \"tests/func/test_ignore.py::test_ignored_output_nested\": 0.4683974000000717,\r\n    \"tests/func/test_ignore.py::test_match_nested\": 0.1366146000000299,\r\n    \"tests/func/test_ignore.py::test_multi_ignore_file\": 0.18268860000000586,\r\n    \"tests/func/test_ignore.py::test_pattern_trie_fs\": 0.16440320000015163,\r\n    \"tests/func/test_ignore.py::test_pull_ignore\": 0.7683907000000545,\r\n    \"tests/func/test_ignore.py::test_remove_file\": 0.1357917999999927,\r\n    \"tests/func/test_ignore.py::test_remove_ignored_file\": 0.1405205000000933,\r\n    \"tests/func/test_ignore.py::test_rename_file\": 0.14469229999986055,\r\n    \"tests/func/test_ignore.py::test_rename_ignored_file\": 0.14287149999984194,\r\n    \"tests/func/test_ignore.py::test_run_dvcignored_dep\": 0.33406170000000657,\r\n    \"tests/func/test_ignore.py::test_walk\": 0.16399349999994683,\r\n    \"tests/func/test_import.py::test_cache_type_is_properly_overridden\": 0.9056994000000032,\r\n    \"tests/func/test_import.py::test_chained_import\": 2.625762800000075,\r\n    \"tests/func/test_import.py::test_granular_import_from_subrepos\": 1.4031246000000692,\r\n    \"tests/func/test_import.py::test_import\": 0.8625809999999774,\r\n    \"tests/func/test_import.py::test_import_broken_dir\": 0.7567731999999978,\r\n    \"tests/func/test_import.py::test_import_cached_file\": 1.0863352999999734,\r\n    \"tests/func/test_import.py::test_import_complete_repo\": 2.6308554000000868,\r\n    \"tests/func/test_import.py::test_import_configs[options0-def_repo0]\": 0.5693571000001612,\r\n    \"tests/func/test_import.py::test_import_configs[options1-def_repo1]\": 0.575264099999913,\r\n    \"tests/func/test_import.py::test_import_configs[options2-def_repo2]\": 0.5618485999999621,\r\n    \"tests/func/test_import.py::test_import_configs[options3-def_repo3]\": 0.5637469000000692,\r\n    \"tests/func/test_import.py::test_import_configs[options4-def_repo4]\": 0.7436949999998888,\r\n    \"tests/func/test_import.py::test_import_dir\": 0.9349176000000625,\r\n    \"tests/func/test_import.py::test_import_file_from_dir\": 2.0776521000001367,\r\n    \"tests/func/test_import.py::test_import_file_from_dir_to_dir\": 0.9712954999999965,\r\n    \"tests/func/test_import.py::test_import_from_bare_git_repo\": 1.746106199999872,\r\n    \"tests/func/test_import.py::test_import_git_dir[False]\": 0.5802926999999727,\r\n    \"tests/func/test_import.py::test_import_git_dir[True]\": 0.9223047000000406,\r\n    \"tests/func/test_import.py::test_import_git_file[False]\": 0.6169221000001244,\r\n    \"tests/func/test_import.py::test_import_git_file[True]\": 0.9858597999998437,\r\n    \"tests/func/test_import.py::test_import_invalid_configs\": 0.6348908999999594,\r\n    \"tests/func/test_import.py::test_import_mixed_dir\": 1.0006189000000631,\r\n    \"tests/func/test_import.py::test_import_no_download\": 0.8493882999999869,\r\n    \"tests/func/test_import.py::test_import_non_cached\": 0.8062373000000207,\r\n    \"tests/func/test_import.py::test_import_non_existing\": 0.5482246999999916,\r\n    \"tests/func/test_import.py::test_import_pipeline_tracked_outs\": 1.8464855999999372,\r\n    \"tests/func/test_import.py::test_import_rev\": 0.8083298000000241,\r\n    \"tests/func/test_import.py::test_import_subrepos[files0-False]\": 1.3250576000000365,\r\n    \"tests/func/test_import.py::test_import_subrepos[files0-True]\": 1.2777296000000433,\r\n    \"tests/func/test_import.py::test_import_subrepos[files1-False]\": 1.2118335000001252,\r\n    \"tests/func/test_import.py::test_import_subrepos[files1-True]\": 1.265125600000033,\r\n    \"tests/func/test_import.py::test_import_to_dir[.]\": 1.1777158999999529,\r\n    \"tests/func/test_import.py::test_import_to_dir[dir/subdir]\": 0.8869288999999299,\r\n    \"tests/func/test_import.py::test_import_to_dir[dir]\": 0.7965009999999211,\r\n    \"tests/func/test_import.py::test_import_with_jobs\": 0.8842647999999826,\r\n    \"tests/func/test_import.py::test_import_with_no_exec\": 0.3832912999999962,\r\n    \"tests/func/test_import.py::test_local_import\": 1.080212700000061,\r\n    \"tests/func/test_import.py::test_parameterized_repo[paths0]\": 1.0523322999999891,\r\n    \"tests/func/test_import.py::test_parameterized_repo[paths1]\": 0.9154290000000174,\r\n    \"tests/func/test_import.py::test_pull_import_no_download\": 1.3039238999999725,\r\n    \"tests/func/test_import.py::test_pull_import_no_download_rev_lock\": 1.1029234000000088,\r\n    \"tests/func/test_import.py::test_pull_imported_directory_stage[dir/]\": 1.3592204999997648,\r\n    \"tests/func/test_import.py::test_pull_imported_directory_stage[dir]\": 1.130154100000027,\r\n    \"tests/func/test_import.py::test_pull_imported_stage\": 1.1966352000000597,\r\n    \"tests/func/test_import.py::test_pull_imported_stage_from_subrepos[files0-False]\": 1.331404799999973,\r\n    \"tests/func/test_import.py::test_pull_imported_stage_from_subrepos[files0-True]\": 1.6920185000000174,\r\n    \"tests/func/test_import.py::test_pull_imported_stage_from_subrepos[files1-False]\": 1.4010470000000623,\r\n    \"tests/func/test_import.py::test_pull_imported_stage_from_subrepos[files1-True]\": 1.5626003999999512,\r\n    \"tests/func/test_import.py::test_pull_no_rev_lock\": 1.0278057000000445,\r\n    \"tests/func/test_import.py::test_pull_non_workspace\": 1.6440565000000333,\r\n    \"tests/func/test_import.py::test_pull_wildcard_imported_directory_stage\": 1.1825951000000714,\r\n    \"tests/func/test_import.py::test_push_wildcard_from_bare_git_repo\": 1.8283545999998978,\r\n    \"tests/func/test_import.py::test_reimport\": 1.1143974999999955,\r\n    \"tests/func/test_import_db.py::test[args0-results-csv]\": 0.6439319000000978,\r\n    \"tests/func/test_import_db.py::test[args0-results-json]\": 0.4460628000000497,\r\n    \"tests/func/test_import_db.py::test[args1-model-csv]\": 0.4647165000001223,\r\n    \"tests/func/test_import_db.py::test[args1-model-json]\": 0.46484750000001895,\r\n    \"tests/func/test_import_url.py::TestImport::test_import\": 0.28322309999987283,\r\n    \"tests/func/test_import_url.py::TestImport::test_import_dir\": 0.3263785999998845,\r\n    \"tests/func/test_import_url.py::TestImport::test_import_empty_dir\": 0.2588596000000507,\r\n    \"tests/func/test_import_url.py::test_cmd_import\": 0.243037500000014,\r\n    \"tests/func/test_import_url.py::test_cmd_unsupported_scheme\": 0.1825516000000107,\r\n    \"tests/func/test_import_url.py::test_default_output\": 0.25000760000011724,\r\n    \"tests/func/test_import_url.py::test_import_conflict_and_override\": 0.29910240000015165,\r\n    \"tests/func/test_import_url.py::test_import_stage_accompanies_target\": 0.7743165999999064,\r\n    \"tests/func/test_import_url.py::test_import_url_fs_config\": 0.41629649999993035,\r\n    \"tests/func/test_import_url.py::test_import_url_no_download\": 0.4691332999999531,\r\n    \"tests/func/test_import_url.py::test_import_url_nonexistent\": 0.3444412999999713,\r\n    \"tests/func/test_import_url.py::test_import_url_preserve_fields\": 0.2801150000001371,\r\n    \"tests/func/test_import_url.py::test_import_url_to_dir[.]\": 0.3046689000000242,\r\n    \"tests/func/test_import_url.py::test_import_url_to_dir[dir/subdir]\": 0.3088328000000047,\r\n    \"tests/func/test_import_url.py::test_import_url_to_dir[dir]\": 0.2801137999998673,\r\n    \"tests/func/test_import_url.py::test_import_url_to_remote_absolute\": 0.4061252000000195,\r\n    \"tests/func/test_import_url.py::test_import_url_to_remote_invalid_combinations\": 0.1392080000000533,\r\n    \"tests/func/test_import_url.py::test_import_url_to_remote_status\": 0.523811300000034,\r\n    \"tests/func/test_import_url.py::test_import_url_with_no_exec\": 0.3659144999999171,\r\n    \"tests/func/test_import_url.py::test_partial_import_pull\": 0.925487999999973,\r\n    \"tests/func/test_import_url.py::test_should_remove_outs_before_import\": 0.4517859000000044,\r\n    \"tests/func/test_init.py::test_allow_init_dvc_subdir\": 0.36744199999986904,\r\n    \"tests/func/test_init.py::test_api_init\": 0.41764749999993,\r\n    \"tests/func/test_init.py::test_cli_init\": 0.3148900999998432,\r\n    \"tests/func/test_init.py::test_double_init\": 0.8229186999999456,\r\n    \"tests/func/test_init.py::test_gen_dvcignore\": 0.25045269999986886,\r\n    \"tests/func/test_init.py::test_init_no_scm_api\": 0.2666681000000608,\r\n    \"tests/func/test_init.py::test_init_no_scm_cli\": 0.23497389999999996,\r\n    \"tests/func/test_init.py::test_init_no_scm_fail_api\": 0.011122799999952804,\r\n    \"tests/func/test_init.py::test_init_no_scm_fail_cli\": 0.026207399999975678,\r\n    \"tests/func/test_init.py::test_init_quiet_should_not_display_welcome_screen\": 0.31500000000005457,\r\n    \"tests/func/test_init.py::test_init_when_ignored_by_git\": 0.03822130000003199,\r\n    \"tests/func/test_init.py::test_subdir_init_no_option\": 0.034765399999969304,\r\n    \"tests/func/test_install.py::TestInstall::test_create_hooks\": 0.001248400000008587,\r\n    \"tests/func/test_install.py::TestInstall::test_fail_if_hook_exists\": 0.0011312999999972817,\r\n    \"tests/func/test_install.py::TestInstall::test_install_pre_commit_tool\": 0.0011799000000110027,\r\n    \"tests/func/test_install.py::TestInstall::test_post_checkout\": 0.0011200999999800842,\r\n    \"tests/func/test_install.py::TestInstall::test_pre_commit_hook\": 0.0011349999999765714,\r\n    \"tests/func/test_install.py::TestInstall::test_pre_push_hook\": 0.0011102999999366148,\r\n    \"tests/func/test_install.py::test_merge_driver\": 0.0011039000000891974,\r\n    \"tests/func/test_install.py::test_merge_driver_no_ancestor\": 0.0011172000000669868,\r\n    \"tests/func/test_lock.py::test_cli\": 0.30169069999999465,\r\n    \"tests/func/test_lock.py::test_unlock_lock_failed\": 0.19527419999997164,\r\n    \"tests/func/test_lock.py::test_unlock_unlocked_raises\": 0.0037138999999797306,\r\n    \"tests/func/test_lock.py::test_with\": 0.19908450000014,\r\n    \"tests/func/test_lockfile.py::test_cmd_changes_other_orders_are_preserved\": 0.7635403999998971,\r\n    \"tests/func/test_lockfile.py::test_deps_outs_are_sorted_by_path\": 0.4513723000000027,\r\n    \"tests/func/test_lockfile.py::test_order_is_preserved_when_pipeline_order_changes\": 1.3011747000000469,\r\n    \"tests/func/test_lockfile.py::test_params_dump\": 1.2367351999999983,\r\n    \"tests/func/test_ls.py::test_broken_symlink\": 0.23077140000009422,\r\n    \"tests/func/test_ls.py::test_ls_broken_dir\": 0.8679249000001619,\r\n    \"tests/func/test_ls.py::test_ls_granular\": 0.6666682000000037,\r\n    \"tests/func/test_ls.py::test_ls_not_existed_url\": 0.04265199999997549,\r\n    \"tests/func/test_ls.py::test_ls_remote_git_only_repo_recursive\": 0.44433429999992313,\r\n    \"tests/func/test_ls.py::test_ls_remote_repo\": 0.9113675999999487,\r\n    \"tests/func/test_ls.py::test_ls_remote_repo_recursive\": 0.8802007000000458,\r\n    \"tests/func/test_ls.py::test_ls_remote_repo_with_path_dir\": 1.0943083999999317,\r\n    \"tests/func/test_ls.py::test_ls_remote_repo_with_rev\": 0.8788874000000533,\r\n    \"tests/func/test_ls.py::test_ls_remote_repo_with_rev_recursive\": 0.9485074999998915,\r\n    \"tests/func/test_ls.py::test_ls_repo\": 0.8172812999999906,\r\n    \"tests/func/test_ls.py::test_ls_repo_dvc_only_recursive\": 1.0351157000000057,\r\n    \"tests/func/test_ls.py::test_ls_repo_recursive\": 0.8275060999998232,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_file_path_fs\": 0.8277243999998518,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_missed_path\": 0.8948088000001917,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_missed_path_dvc_only\": 0.873964800000067,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_new_path_dir\": 0.7521163999999771,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_dir\": 0.8435791999999083,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_dir_dvc_only_empty\": 1.5219220000000178,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_file_out\": 0.89468480000005,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_subdir\": 0.8051321000000371,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_subdir_dvc_only\": 0.9938705999999229,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_path_subdir_dvc_only_recursive\": 0.9960608000000093,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_removed_dvc_dir\": 0.849593900000059,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_removed_dvc_dir_recursive\": 1.0610984000001054,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_removed_dvc_dir_with_path_dir\": 0.8317214999999578,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_removed_dvc_dir_with_path_file\": 0.891010499999993,\r\n    \"tests/func/test_ls.py::test_ls_repo_with_rev\": 0.8284595999999738,\r\n    \"tests/func/test_ls.py::test_ls_shows_pipeline_tracked_outs\": 0.6767601999999897,\r\n    \"tests/func/test_ls.py::test_ls_target[False]\": 1.0040828999999576,\r\n    \"tests/func/test_ls.py::test_ls_target[True]\": 0.5304179000000886,\r\n    \"tests/func/test_ls.py::test_subrepo[False-git_dir]\": 1.3562943999999106,\r\n    \"tests/func/test_ls.py::test_subrepo[True-erepo_dir]\": 2.086549600000012,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_dir\": 0.015517799999884119,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_file[dir/foo]\": 0.010514699999930599,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_file[foo.dvc]\": 0.01171199999998862,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_file[foo]\": 0.012536199999999553,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_nonexistent\": 0.011345700000106262,\r\n    \"tests/func/test_ls_url.py::TestLsUrl::test_recursive\": 0.021404200000006313,\r\n    \"tests/func/test_ls_url.py::test_ls_url_config\": 0.2156835000000683,\r\n    \"tests/func/test_merge_driver.py::test_merge[None-None-their6-merged6]\": 0.3963009000000284,\r\n    \"tests/func/test_merge_driver.py::test_merge[None-our5-their5-merged5]\": 0.5082504000001791,\r\n    \"tests/func/test_merge_driver.py::test_merge[None-our7-None-merged7]\": 0.38870360000009896,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor0-our0-their0-merged0]\": 0.8395247000000836,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor1-our1-their1-merged1]\": 0.6888646999999537,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor10-our10-their10-merged10]\": 0.7417891999999711,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor2-our2-their2-merged2]\": 0.6183836999998675,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor3-our3-their3-merged3]\": 0.5287539000000834,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor4-our4-their4-merged4]\": 0.5320171999998138,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor8-our8-their8-merged8]\": 0.6128427999999531,\r\n    \"tests/func/test_merge_driver.py::test_merge[ancestor9-our9-their9-merged9]\": 0.6826347999999598,\r\n    \"tests/func/test_merge_driver.py::test_merge_conflict[ancestor0-our0-their0-unable to auto-merge the following paths:\\\\nfoo]\": 0.4920667000000094,\r\n    \"tests/func/test_merge_driver.py::test_merge_conflict[ancestor1-our1-their1-unable to auto-merge the following paths:\\\\nboth deleted: ('foo',)]\": 0.5235576999998557,\r\n    \"tests/func/test_merge_driver.py::test_merge_different_output_options\": 0.23589609999999084,\r\n    \"tests/func/test_merge_driver.py::test_merge_file\": 0.20629750000000513,\r\n    \"tests/func/test_merge_driver.py::test_merge_non_dvc_add\": 0.2590516000001344,\r\n    \"tests/func/test_move.py::test_cmd_move\": 0.4135996999999634,\r\n    \"tests/func/test_move.py::test_move\": 0.29322739999986425,\r\n    \"tests/func/test_move.py::test_move_directory\": 0.2985218999999688,\r\n    \"tests/func/test_move.py::test_move_directory_should_not_overwrite_existing\": 0.4510453000001462,\r\n    \"tests/func/test_move.py::test_move_file_between_directories\": 0.41026959999999235,\r\n    \"tests/func/test_move.py::test_move_file_inside_directory\": 0.42602529999999206,\r\n    \"tests/func/test_move.py::test_move_file_to_directory\": 0.38129300000002786,\r\n    \"tests/func/test_move.py::test_move_file_to_directory_without_specified_target_name\": 0.3573622000000114,\r\n    \"tests/func/test_move.py::test_move_file_with_extension\": 0.3254477000001543,\r\n    \"tests/func/test_move.py::test_move_gitignored\": 0.2915448999998489,\r\n    \"tests/func/test_move.py::test_move_meta\": 0.25579289999996035,\r\n    \"tests/func/test_move.py::test_move_non_existent_file\": 0.13955350000003364,\r\n    \"tests/func/test_move.py::test_move_not_data_source\": 0.42298670000002403,\r\n    \"tests/func/test_move.py::test_move_output_overlap\": 0.2740764999999783,\r\n    \"tests/func/test_move.py::test_move_should_save_stage_info\": 0.3627708999999868,\r\n    \"tests/func/test_move.py::test_should_move_to_dir_on_non_default_stage_file\": 0.2742161000001033,\r\n    \"tests/func/test_odb.py::test_cache\": 0.15525479999996605,\r\n    \"tests/func/test_odb.py::test_cache_link_type\": 0.27838630000007925,\r\n    \"tests/func/test_odb.py::test_cache_load_bad_dir_cache\": 0.14457310000011603,\r\n    \"tests/func/test_odb.py::test_cmd_cache_abs_path\": 0.20145130000003064,\r\n    \"tests/func/test_odb.py::test_cmd_cache_dir\": 0.17947869999989052,\r\n    \"tests/func/test_odb.py::test_cmd_cache_relative_path\": 0.2748297000000548,\r\n    \"tests/func/test_odb.py::test_default_cache_type\": 0.13929650000011407,\r\n    \"tests/func/test_odb.py::test_external_cache_dir\": 0.31341150000002926,\r\n    \"tests/func/test_odb.py::test_shared_cache[False]\": 0.0012287999999216481,\r\n    \"tests/func/test_odb.py::test_shared_cache[True]\": 0.0011681999999382242,\r\n    \"tests/func/test_odb.py::test_shared_cache_dir\": 0.8903651000000536,\r\n    \"tests/func/test_remote.py::test_dir_hash_should_be_key_order_agnostic\": 0.12442099999987022,\r\n    \"tests/func/test_remote.py::test_modify_missing_remote\": 0.14263769999990927,\r\n    \"tests/func/test_remote.py::test_partial_push_n_pull\": 1.0375717000000577,\r\n    \"tests/func/test_remote.py::test_protect_local_remote\": 0.558463000000188,\r\n    \"tests/func/test_remote.py::test_push_incomplete_dir\": 0.5540922999999793,\r\n    \"tests/func/test_remote.py::test_push_order\": 0.5294410999999855,\r\n    \"tests/func/test_remote.py::test_raise_on_too_many_open_files\": 0.34681590000002416,\r\n    \"tests/func/test_remote.py::test_referencing_other_remotes\": 0.20356249999997544,\r\n    \"tests/func/test_remote.py::test_remote\": 0.2198851000000559,\r\n    \"tests/func/test_remote.py::test_remote_add_relative_path\": 0.1899215999999342,\r\n    \"tests/func/test_remote.py::test_remote_default\": 0.17432330000008278,\r\n    \"tests/func/test_remote.py::test_remote_default_cmd\": 0.22403440000005048,\r\n    \"tests/func/test_remote.py::test_remote_duplicated\": 0.16780879999998888,\r\n    \"tests/func/test_remote.py::test_remote_modify_default\": 0.16813999999988027,\r\n    \"tests/func/test_remote.py::test_remote_modify_local_on_repo_config\": 0.14979379999988396,\r\n    \"tests/func/test_remote.py::test_remote_modify_unset\": 0.16284140000004754,\r\n    \"tests/func/test_remote.py::test_remote_modify_validation\": 0.1460834000001796,\r\n    \"tests/func/test_remote.py::test_remote_overwrite\": 0.19935130000010304,\r\n    \"tests/func/test_remote.py::test_remote_remove\": 0.25832579999996597,\r\n    \"tests/func/test_remote.py::test_remote_rename\": 0.16758230000004914,\r\n    \"tests/func/test_remote.py::test_remove_default\": 0.22759759999996731,\r\n    \"tests/func/test_remote.py::test_show_default\": 0.24933369999996557,\r\n    \"tests/func/test_remote.py::test_upper_case_remote\": 0.7164872000000742,\r\n    \"tests/func/test_remove.py::test_cmd_remove\": 0.4812683999999763,\r\n    \"tests/func/test_remove.py::test_cmd_remove_gitignore_multistage\": 1.0105980000000727,\r\n    \"tests/func/test_remove.py::test_cmd_remove_gitignore_single_stage\": 0.4817467000000306,\r\n    \"tests/func/test_remove.py::test_remove[False]\": 0.8319298999999774,\r\n    \"tests/func/test_remove.py::test_remove[True]\": 0.9560232000000042,\r\n    \"tests/func/test_remove.py::test_remove_broken_symlink\": 0.2596629000000803,\r\n    \"tests/func/test_remove.py::test_remove_file_target\": 0.2195297999999184,\r\n    \"tests/func/test_remove.py::test_remove_non_existent_file\": 0.16874089999987518,\r\n    \"tests/func/test_repo.py::test_destroy\": 0.9150323999999728,\r\n    \"tests/func/test_repo.py::test_open_bare\": 1.818128500000057,\r\n    \"tests/func/test_repo_index.py::test_data_index\": 2.5113304999999855,\r\n    \"tests/func/test_repo_index.py::test_deps_outs_getters\": 0.6611544999999523,\r\n    \"tests/func/test_repo_index.py::test_ignored_dir_unignored_pattern\": 0.3130208000000039,\r\n    \"tests/func/test_repo_index.py::test_index\": 0.5706569999999829,\r\n    \"tests/func/test_repo_index.py::test_param_keys_no_params\": 0.142780499999958,\r\n    \"tests/func/test_repo_index.py::test_param_keys_returns_default_file\": 0.17251450000003388,\r\n    \"tests/func/test_repo_index.py::test_param_keys_top_level_params\": 0.1507502999999133,\r\n    \"tests/func/test_repo_index.py::test_repr\": 0.4476339000001417,\r\n    \"tests/func/test_repo_index.py::test_skip_graph_checks\": 0.1436386999999968,\r\n    \"tests/func/test_repo_index.py::test_update\": 0.1739161000000422,\r\n    \"tests/func/test_repo_index.py::test_used_objs\": 0.914983799999959,\r\n    \"tests/func/test_repo_index.py::test_view_brancher\": 0.4640120999999908,\r\n    \"tests/func/test_repo_index.py::test_view_combined_filter\": 0.6019791999999597,\r\n    \"tests/func/test_repo_index.py::test_view_granular_dir\": 0.6928808000000117,\r\n    \"tests/func/test_repo_index.py::test_view_onerror\": 0.4158800999999812,\r\n    \"tests/func/test_repo_index.py::test_view_outs_filter\": 0.7267402999998467,\r\n    \"tests/func/test_repo_index.py::test_view_stage_filter\": 0.6276586000000179,\r\n    \"tests/func/test_repo_index.py::test_with_gitignore\": 0.32279620000008435,\r\n    \"tests/func/test_root.py::test_root\": 0.1449580999999398,\r\n    \"tests/func/test_root.py::test_root_locked\": 0.16051739999988968,\r\n    \"tests/func/test_run.py::TestCmdRunWorkingDirectory::test_default_wdir_is_not_written\": 0.35049709999998413,\r\n    \"tests/func/test_run.py::TestCmdRunWorkingDirectory::test_fname_changes_path_and_wdir\": 0.28195549999998093,\r\n    \"tests/func/test_run.py::TestRunBadWdir::test\": 0.18590370000003986,\r\n    \"tests/func/test_run.py::TestRunBadWdir::test_not_dir\": 0.17233740000006037,\r\n    \"tests/func/test_run.py::TestRunBadWdir::test_not_found\": 0.14417720000017198,\r\n    \"tests/func/test_run.py::TestRunBadWdir::test_same_prefix\": 0.15853690000017195,\r\n    \"tests/func/test_run.py::TestRunCircularDependency::test\": 0.21522169999991547,\r\n    \"tests/func/test_run.py::TestRunCircularDependency::test_graph\": 0.5327369000000317,\r\n    \"tests/func/test_run.py::TestRunCircularDependency::test_non_normalized_paths\": 0.19097459999989042,\r\n    \"tests/func/test_run.py::TestRunCircularDependency::test_outs_no_cache\": 0.19103880000000117,\r\n    \"tests/func/test_run.py::TestRunDuplicatedArguments::test\": 0.16231089999996584,\r\n    \"tests/func/test_run.py::TestRunDuplicatedArguments::test_non_normalized_paths\": 0.17710330000011254,\r\n    \"tests/func/test_run.py::TestRunDuplicatedArguments::test_outs_no_cache\": 0.16176649999999881,\r\n    \"tests/func/test_run.py::test_always_changed\": 0.39973380000003544,\r\n    \"tests/func/test_run.py::test_dump_on_non_cached_outputs\": 0.34675960000004125,\r\n    \"tests/func/test_run.py::test_graph\": 0.5643173000000843,\r\n    \"tests/func/test_run.py::test_metrics_dir[metrics]\": 0.5072695000001204,\r\n    \"tests/func/test_run.py::test_metrics_dir[metrics_no_cache]\": 0.4531345999998848,\r\n    \"tests/func/test_run.py::test_rerun_callback\": 0.22751670000002377,\r\n    \"tests/func/test_run.py::test_rerun_changed_dep\": 0.5991066000000274,\r\n    \"tests/func/test_run.py::test_rerun_changed_out\": 0.3850783999999976,\r\n    \"tests/func/test_run.py::test_rerun_changed_stage\": 0.3589001999999937,\r\n    \"tests/func/test_run.py::test_rerun_deterministic\": 0.6178350000001274,\r\n    \"tests/func/test_run.py::test_rerun_deterministic_ignore_cache\": 0.5254332999998041,\r\n    \"tests/func/test_run.py::test_run\": 0.44643459999997503,\r\n    \"tests/func/test_run.py::test_run_already_exists\": 0.6263374000000113,\r\n    \"tests/func/test_run.py::test_run_cached\": 0.6409001000001808,\r\n    \"tests/func/test_run.py::test_run_dump\": 0.5559751000000688,\r\n    \"tests/func/test_run.py::test_run_empty\": 0.17519049999998515,\r\n    \"tests/func/test_run.py::test_run_external_outputs\": 0.2546858999999131,\r\n    \"tests/func/test_run.py::test_run_missing_dep\": 0.16516739999997299,\r\n    \"tests/func/test_run.py::test_run_no_exec\": 0.3877132999998594,\r\n    \"tests/func/test_run.py::test_run_overwrite_order\": 0.8002424999999675,\r\n    \"tests/func/test_run.py::test_run_overwrite_preserves_meta_and_comment\": 0.5934765999999172,\r\n    \"tests/func/test_run.py::test_run_params_custom_file\": 0.23986200000001645,\r\n    \"tests/func/test_run.py::test_run_params_default\": 0.2401186999999254,\r\n    \"tests/func/test_run.py::test_run_params_no_exec\": 0.17146010000010392,\r\n    \"tests/func/test_run.py::test_run_remove_outs\": 0.3511172999999417,\r\n    \"tests/func/test_run.py::test_run_repeat\": 0.8495632000001478,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[#]\": 0.15131850000000213,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[$]\": 0.18822580000005473,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[,]\": 0.18252570000004198,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[.]\": 0.19337220000011257,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[/]\": 0.14692950000005567,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[:]\": 0.16086330000007365,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[;]\": 0.2369408999999223,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[@:]\": 0.15973589999998694,\r\n    \"tests/func/test_run.py::test_run_with_invalid_stage_name[\\\\\\\\]\": 0.16169760000002498,\r\n    \"tests/func/test_run.py::test_run_with_name_having_hyphen_underscore\": 0.47360779999996794,\r\n    \"tests/func/test_run.py::test_run_without_cmd[kwargs0]\": 0.19577829999991536,\r\n    \"tests/func/test_run.py::test_run_without_cmd[kwargs1]\": 0.1459709999999177,\r\n    \"tests/func/test_run.py::test_should_not_checkout_upon_corrupted_local_hardlink_cache\": 0.4960602999999537,\r\n    \"tests/func/test_run.py::test_should_raise_on_overlapping_output_paths\": 0.2538355000000365,\r\n    \"tests/func/test_run.py::test_with_wdir\": 0.3469372999999223,\r\n    \"tests/func/test_run_cache.py::test_do_not_save_on_no_exec_and_dry\": 0.2275765000000547,\r\n    \"tests/func/test_run_cache.py::test_memory_for_multiple_runs_of_same_stage\": 0.8523804000000155,\r\n    \"tests/func/test_run_cache.py::test_memory_runs_of_multiple_stages\": 0.8905641000001197,\r\n    \"tests/func/test_run_cache.py::test_newest_entry_is_loaded_for_non_deterministic_stage\": 1.1082949999999983,\r\n    \"tests/func/test_run_cache.py::test_outs_no_cache_deactivate_run_cache[metrics_no_cache-True]\": 0.4292524999999614,\r\n    \"tests/func/test_run_cache.py::test_outs_no_cache_deactivate_run_cache[outs_no_cache-False]\": 0.3989267999999129,\r\n    \"tests/func/test_run_cache.py::test_outs_no_cache_deactivate_run_cache[plots_no_cache-True]\": 0.4542271999998775,\r\n    \"tests/func/test_run_cache.py::test_push_pull\": 0.8825567999999748,\r\n    \"tests/func/test_run_cache.py::test_push_pull_unsupported\": 0.6354907000001049,\r\n    \"tests/func/test_run_cache.py::test_restore\": 0.47588069999994786,\r\n    \"tests/func/test_run_cache.py::test_restore_pull\": 0.7859493000000839,\r\n    \"tests/func/test_run_cache.py::test_save\": 0.37583450000011,\r\n    \"tests/func/test_scm.py::test_init_git\": 0.09183709999990697,\r\n    \"tests/func/test_scm.py::test_init_no_git\": 0.018588600000043698,\r\n    \"tests/func/test_scm.py::test_init_none\": 0.013314399999899251,\r\n    \"tests/func/test_scm.py::test_init_sub_dir\": 0.10799759999997605,\r\n    \"tests/func/test_scm.py::test_lfs_prefetch\": 0.3691949999998769,\r\n    \"tests/func/test_scm_context.py::test_scm_context_autostage\": 0.30232489999991685,\r\n    \"tests/func/test_scm_context.py::test_scm_context_ignore\": 0.27744780000011815,\r\n    \"tests/func/test_scm_context.py::test_scm_context_ignore_remove\": 0.19912219999991976,\r\n    \"tests/func/test_scm_context.py::test_scm_context_no_track_on_ignore_remove\": 0.19690459999992527,\r\n    \"tests/func/test_scm_context.py::test_scm_context_try_ignore_remove_non_existing_entry\": 0.20904159999997773,\r\n    \"tests/func/test_scm_context.py::test_scm_context_when_already_ignored\": 0.254342500000007,\r\n    \"tests/func/test_stage.py::test_cmd_obj\": 0.005093600000009246,\r\n    \"tests/func/test_stage.py::test_collect_symlink[False]\": 0.27943709999988187,\r\n    \"tests/func/test_stage.py::test_collect_symlink[True]\": 0.22402759999999944,\r\n    \"tests/func/test_stage.py::test_default_wdir_ignored_in_checksum\": 0.3386774000000514,\r\n    \"tests/func/test_stage.py::test_empty_list\": 0.003349600000092323,\r\n    \"tests/func/test_stage.py::test_external_remote_dependency_resolution\": 0.24288849999993545,\r\n    \"tests/func/test_stage.py::test_external_remote_output_resolution\": 0.22982309999997597,\r\n    \"tests/func/test_stage.py::test_list\": 0.0035652999999911117,\r\n    \"tests/func/test_stage.py::test_md5_ignores_annotations\": 0.14674890000014784,\r\n    \"tests/func/test_stage.py::test_md5_ignores_comments\": 0.2633077999998932,\r\n    \"tests/func/test_stage.py::test_meta_desc_is_preserved\": 0.1833113999998659,\r\n    \"tests/func/test_stage.py::test_no_cmd\": 0.003441100000031838,\r\n    \"tests/func/test_stage.py::test_none\": 0.003471899999794914,\r\n    \"tests/func/test_stage.py::test_object\": 0.0035700000000815635,\r\n    \"tests/func/test_stage.py::test_parent_repo_collect_stages\": 0.9687402000001839,\r\n    \"tests/func/test_stage.py::test_reload\": 0.22573539999996228,\r\n    \"tests/func/test_stage.py::test_stage_add_duplicated_output\": 0.31772509999996146,\r\n    \"tests/func/test_stage.py::test_stage_on_no_path_string_repr\": 0.19597939999994196,\r\n    \"tests/func/test_stage.py::test_stage_remove_pipeline_stage\": 0.6968345000000227,\r\n    \"tests/func/test_stage.py::test_stage_remove_pointer_stage\": 0.29759790000002795,\r\n    \"tests/func/test_stage.py::test_stage_strings_representation\": 0.4293736000000763,\r\n    \"tests/func/test_stage_load.py::test_collect\": 1.5442301999999017,\r\n    \"tests/func/test_stage_load.py::test_collect_dir_recursive\": 0.5649249000000509,\r\n    \"tests/func/test_stage_load.py::test_collect_generated\": 0.24801749999994627,\r\n    \"tests/func/test_stage_load.py::test_collect_glob\": 0.9323076999999103,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_collision_output_dir_stage_name\": 0.4615868999999293,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_not_existing_stage_name\": 0.5574206000001141,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_priority_on_collision\": 0.43749460000003637,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_same_output_name_stage_name\": 0.4683784999999716,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_with_deps\": 0.9040660999999091,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_with_no_target\": 0.8537042000000383,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_with_not_existing_output_or_stage_name\": 0.12876589999984844,\r\n    \"tests/func/test_stage_load.py::test_collect_granular_with_target\": 0.9354548000000023,\r\n    \"tests/func/test_stage_load.py::test_collect_not_a_group_stage_with_group_flag\": 1.090573000000063,\r\n    \"tests/func/test_stage_load.py::test_collect_optimization\": 0.21226439999998092,\r\n    \"tests/func/test_stage_load.py::test_collect_optimization_on_stage_name\": 0.43537639999988187,\r\n    \"tests/func/test_stage_load.py::test_collect_repo_callback\": 0.24589080000009744,\r\n    \"tests/func/test_stage_load.py::test_collect_with_not_existing_dvcfile[not_existing.dvc:stage_name]\": 0.13259750000008808,\r\n    \"tests/func/test_stage_load.py::test_collect_with_not_existing_dvcfile[not_existing.dvc]\": 0.13957369999991442,\r\n    \"tests/func/test_stage_load.py::test_collect_with_not_existing_dvcfile[not_existing/dvc.yaml:stage_name]\": 0.1478920999999218,\r\n    \"tests/func/test_stage_load.py::test_collect_with_not_existing_dvcfile[not_existing/dvc.yaml]\": 0.12979939999991075,\r\n    \"tests/func/test_stage_load.py::test_collect_with_not_existing_output_or_stage_name\": 0.4114243000000215,\r\n    \"tests/func/test_stage_load.py::test_get_stage\": 0.34875819999990654,\r\n    \"tests/func/test_stage_load.py::test_get_stage_single_stage_dvcfile\": 0.2461819000000105,\r\n    \"tests/func/test_stage_load.py::test_get_stages\": 0.6215028000000302,\r\n    \"tests/func/test_stage_load.py::test_get_stages_old_dvcfile\": 0.21525209999992967,\r\n    \"tests/func/test_stage_load.py::test_gitignored_file_try_collect_granular_for_data_files\": 0.31906579999997575,\r\n    \"tests/func/test_stage_load.py::test_gitignored_file_try_collect_granular_for_dvc_yaml_files\": 1.5553847999999562,\r\n    \"tests/func/test_stage_load.py::test_stages\": 0.414835900000071,\r\n    \"tests/func/test_state.py::test_get_unused_links\": 0.29348780000009356,\r\n    \"tests/func/test_state.py::test_remove_links\": 0.35063050000007934,\r\n    \"tests/func/test_state.py::test_state\": 0.34103749999985666,\r\n    \"tests/func/test_state.py::test_state_overflow\": 0.44685179999999036,\r\n    \"tests/func/test_status.py::test_implied_cloud\": 1.3159074000000146,\r\n    \"tests/func/test_status.py::test_params_without_targets\": 0.2553169999999909,\r\n    \"tests/func/test_status.py::test_quiet\": 0.3903937000000042,\r\n    \"tests/func/test_status.py::test_status_before_and_after_dvc_init\": 1.24051709999992,\r\n    \"tests/func/test_status.py::test_status_non_dvc_repo_import[False]\": 0.5673391999998785,\r\n    \"tests/func/test_status.py::test_status_non_dvc_repo_import[True]\": 0.6612268000000086,\r\n    \"tests/func/test_status.py::test_status_on_pipeline_stages\": 0.551217299999962,\r\n    \"tests/func/test_status.py::test_status_outputs\": 0.5847573000000921,\r\n    \"tests/func/test_status.py::test_status_recursive\": 0.2683987000000343,\r\n    \"tests/func/test_unprotect.py::test_unprotect\": 0.24031379999996716,\r\n    \"tests/func/test_update.py::test_update_before_and_after_dvc_init\": 1.652384100000063,\r\n    \"tests/func/test_update.py::test_update_from_subrepos[False]\": 1.5830997999999,\r\n    \"tests/func/test_update.py::test_update_from_subrepos[True]\": 2.272872600000028,\r\n    \"tests/func/test_update.py::test_update_import[False]\": 1.8188473000001295,\r\n    \"tests/func/test_update.py::test_update_import[True]\": 2.7979914999999664,\r\n    \"tests/func/test_update.py::test_update_import_after_remote_updates_to_dvc\": 1.569626900000003,\r\n    \"tests/func/test_update.py::test_update_import_to_remote\": 0.7015065000000504,\r\n    \"tests/func/test_update.py::test_update_import_url\": 0.39181920000009995,\r\n    \"tests/func/test_update.py::test_update_import_url_no_download[False]\": 0.28972529999987273,\r\n    \"tests/func/test_update.py::test_update_import_url_no_download[True]\": 0.36471770000002834,\r\n    \"tests/func/test_update.py::test_update_import_url_to_remote\": 0.6202769999998736,\r\n    \"tests/func/test_update.py::test_update_import_url_to_remote_directory\": 0.7887408000001415,\r\n    \"tests/func/test_update.py::test_update_import_url_to_remote_directory_changed_contents\": 0.7646365000000515,\r\n    \"tests/func/test_update.py::test_update_import_url_to_remote_directory_same_hash\": 0.7358169999998836,\r\n    \"tests/func/test_update.py::test_update_import_url_unchanged\": 0.32728270000006887,\r\n    \"tests/func/test_update.py::test_update_no_download[False]\": 1.2106467999999495,\r\n    \"tests/func/test_update.py::test_update_no_download[True]\": 1.3192696999999498,\r\n    \"tests/func/test_update.py::test_update_recursive\": 1.6255898999997953,\r\n    \"tests/func/test_update.py::test_update_rev\": 1.0238027999998849,\r\n    \"tests/func/test_update.py::test_update_unchanged\": 1.2584471000000121,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[.-.-dvc.yaml:train]\": 0.43971429999999145,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[.-.-foo]\": 0.40643180000006396,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[.-.-train]\": 0.4276787000000013,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[.-sub-..\\\\\\\\dvc.yaml:train]\": 0.42697390000000723,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[.-sub-..\\\\\\\\foo]\": 0.4174022000000832,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-.-sub\\\\\\\\dvc.yaml:train]\": 0.5342236999999841,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-.-sub\\\\\\\\foo]\": 0.5391186000001653,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-dir-..\\\\\\\\sub\\\\\\\\dvc.yaml:train]\": 0.44631729999991876,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-dir-..\\\\\\\\sub\\\\\\\\foo]\": 0.40331539999999677,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-sub-dvc.yaml:train]\": 0.4777887999999848,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-sub-foo]\": 0.4485227999998642,\r\n    \"tests/func/test_used_objs.py::test_from_gitfs_when_pwd_not_in_root[sub-sub-train]\": 0.4451402000000826,\r\n    \"tests/func/test_used_objs.py::test_used_objs_push\": 0.3140799999999899,\r\n    \"tests/func/test_utils.py::test_boxify\": 0.0037649999999302963,\r\n    \"tests/func/test_utils.py::test_dict_md5\": 0.003655699999853823,\r\n    \"tests/func/test_utils.py::test_glob_no_match\": 0.004886600000077124,\r\n    \"tests/func/test_version.py::test_\": 1.3859016000001247,\r\n    \"tests/func/test_version.py::test_import_error\": 0.365707499999985,\r\n    \"tests/func/test_virtual_directory.py::test_partial_checkout_and_update\": 1.4939856999999392,\r\n    \"tests/func/test_virtual_directory.py::test_virtual_add\": 0.9724830999999767,\r\n    \"tests/func/test_virtual_directory.py::test_virtual_remove\": 0.9881833000000597,\r\n    \"tests/func/test_virtual_directory.py::test_virtual_update_dir\": 0.8301616999998487,\r\n    \"tests/func/test_virtual_directory.py::test_virtual_update_file\": 0.7492493000000877,\r\n    \"tests/func/test_virtual_directory.py::test_virtual_update_noop\": 0.902233600000045,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides0-expected0-json]\": 0.016757900000129666,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides0-expected0-toml]\": 0.013149600000019745,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides0-expected0-yaml]\": 0.026289499999961663,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides1-expected1-json]\": 0.01717210000003888,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides1-expected1-toml]\": 0.01372639999999592,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides1-expected1-yaml]\": 0.023308899999960886,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides10-expected10-json]\": 0.021389800000065406,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides10-expected10-toml]\": 0.021289899999942463,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides10-expected10-yaml]\": 0.029417499999908614,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides11-expected11-json]\": 0.016622700000084478,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides11-expected11-toml]\": 0.025770200000124532,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides11-expected11-yaml]\": 0.03114780000009887,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides2-expected2-json]\": 0.014930700000149955,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides2-expected2-toml]\": 0.011712699999975484,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides2-expected2-yaml]\": 0.023883599999976468,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides3-expected3-json]\": 0.021143299999948795,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides3-expected3-toml]\": 0.019983099999990372,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides3-expected3-yaml]\": 0.024119600000062746,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides4-expected4-json]\": 0.016237299999829702,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides4-expected4-toml]\": 0.03111649999993915,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides4-expected4-yaml]\": 0.02495239999984733,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides5-expected5-json]\": 0.020187100000043756,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides5-expected5-toml]\": 0.019237699999962388,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides5-expected5-yaml]\": 0.032607999999981985,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides6-expected6-json]\": 0.016924699999890436,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides6-expected6-toml]\": 0.04386929999986933,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides6-expected6-yaml]\": 0.024652899999978217,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides7-expected7-json]\": 0.015520899999955873,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides7-expected7-toml]\": 0.012692199999946752,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides7-expected7-yaml]\": 0.030214099999966493,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides8-expected8-json]\": 0.021859500000005028,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides8-expected8-toml]\": 0.020292899999958536,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides8-expected8-yaml]\": 0.033927000000062435,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides9-expected9-json]\": 0.016962000000035005,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides9-expected9-toml]\": 0.02381449999995766,\r\n    \"tests/func/utils/test_hydra.py::test_apply_overrides[overrides9-expected9-yaml]\": 0.025491699999975026,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_dir_module[None-None-None-error_context3]\": 0.013406300000042393,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_dir_module[None-hydra.test_utils.configs-config_content1-error_context1]\": 0.1134221999999454,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_dir_module[conf-None-config_content0-error_context0]\": 0.11507209999990664,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_dir_module[conf-hydra.test_utils.configs-config_content2-error_context2]\": 0.11261070000000473,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides0-expected0-json]\": 0.1281308000000081,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides0-expected0-toml]\": 0.1236941999999317,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides0-expected0-yaml]\": 0.12669659999994565,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides1-expected1-json]\": 0.12357580000002599,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides1-expected1-toml]\": 0.17739260000007562,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides1-expected1-yaml]\": 0.12748369999997067,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides2-expected2-json]\": 0.1338885999999775,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides2-expected2-toml]\": 0.1365699999998924,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_overrides[overrides2-expected2-yaml]\": 0.1324849999999742,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_plugins\": 0.13925589999996646,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_resolves_interpolation\": 0.1168084999999337,\r\n    \"tests/func/utils/test_hydra.py::test_compose_and_dump_yaml_handles_string\": 0.10991609999996399,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides0-expected0]\": 0.006874799999877723,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides1-expected1]\": 0.006828100000120685,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides2-expected2]\": 0.005153599999971448,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides3-expected3]\": 0.010010100000044986,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides4-expected4]\": 0.005206800000109979,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides5-expected5]\": 0.005195799999910378,\r\n    \"tests/func/utils/test_hydra.py::test_hydra_sweeps[overrides6-expected6]\": 0.0055651999999781765,\r\n    \"tests/func/utils/test_hydra.py::test_invalid_overrides[overrides0]\": 0.025969000000031883,\r\n    \"tests/func/utils/test_hydra.py::test_invalid_overrides[overrides1]\": 0.026963600000044607,\r\n    \"tests/func/utils/test_hydra.py::test_invalid_overrides[overrides2]\": 0.02018349999991642,\r\n    \"tests/func/utils/test_hydra.py::test_invalid_overrides[overrides3]\": 0.021663399999965804,\r\n    \"tests/func/utils/test_hydra.py::test_invalid_sweep\": 0.00546810000014375,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[additional_key_on_outs]\": 0.243435399999953,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[deps_as_dict]\": 0.2002921000000697,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[duplicate_keys]\": 0.2018291000000545,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[empty_stage]\": 0.19218150000006062,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[foreach_do_do_null]\": 0.1954337999999325,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[foreach_do_missing_cmd]\": 0.2215969999999743,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[foreach_scalar]\": 0.2090827999999192,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[foreach_unknown_cmd_missing_do]\": 0.19983810000007907,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[mapping_values_not_allowed]\": 0.21080290000008972,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[merge_conflicts]\": 0.18337130000008983,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[missing_cmd]\": 0.19634989999997288,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[no_hyphen_block]\": 0.19842909999999847,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[not_a_dict]\": 0.18917439999995622,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[null_value_on_outs]\": 0.19282620000001316,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[outs_as_str]\": 0.17304079999996702,\r\n    \"tests/func/utils/test_strict_yaml.py::test_exceptions[unclosed_scalar]\": 0.19768529999998918,\r\n    \"tests/func/utils/test_strict_yaml.py::test_fallback_exception_message\": 0.26093439999988277,\r\n    \"tests/func/utils/test_strict_yaml.py::test_make_relpath\": 0.014381800000023759,\r\n    \"tests/func/utils/test_strict_yaml.py::test_on_revision[stages:\\\\n  stage1:\\\\n    cmd: python train.py\\\\n    cmd: python train.py\\\\n-'./dvc.yaml' is invalid in revision '{short_rev}'.]\": 0.5581684999998515,\r\n    \"tests/func/utils/test_strict_yaml.py::test_on_revision[stages:\\\\n  stage1:\\\\n    cmd: {}\\\\n-'./dvc.yaml' validation failed in revision '{short_rev}'.]\": 0.5184321000000409,\r\n    \"tests/integration/plots/test_plots.py::test_config_output_dir\": 0.29116190000002007,\r\n    \"tests/integration/plots/test_plots.py::test_nested_x_defn_collection\": 0.5174432999999681,\r\n    \"tests/integration/plots/test_plots.py::test_no_plots\": 0.2940975999999864,\r\n    \"tests/integration/plots/test_plots.py::test_plots_empty_directory\": 0.5595962999999529,\r\n    \"tests/integration/plots/test_plots.py::test_repo_with_config_plots\": 2.7835139000000026,\r\n    \"tests/integration/plots/test_plots.py::test_repo_with_dvclive_plots\": 0.7092543999999634,\r\n    \"tests/integration/plots/test_plots.py::test_repo_with_plots\": 3.33253650000006,\r\n    \"tests/integration/plots/test_plots.py::test_repo_with_removed_plots\": 1.9890381000001298,\r\n    \"tests/integration/plots/test_repo_plots_api.py::test_api\": 2.918742299999849,\r\n    \"tests/integration/plots/test_repo_plots_api.py::test_api_with_config_plots\": 2.3034652999999707,\r\n    \"tests/integration/test_studio_live_experiments.py::test_monorepo_relpath\": 0.849361300000055,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[DVC_EXP_GIT_REMOTE-False-False]\": 1.1246794999999565,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[DVC_EXP_GIT_REMOTE-False-True]\": 1.7383136000000832,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[DVC_EXP_GIT_REMOTE-True-False]\": 1.0295220999998946,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[DVC_EXP_GIT_REMOTE-True-True]\": 1.9327763000001141,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[None-False-False]\": 1.1464447999999265,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[None-False-True]\": 1.6287972000000082,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[None-True-False]\": 1.0905978000000687,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio[None-True-True]\": 1.934522399999878,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio_subdir[False]\": 1.410834799999975,\r\n    \"tests/integration/test_studio_live_experiments.py::test_post_to_studio_subdir[True]\": 1.9778683000000683,\r\n    \"tests/integration/test_studio_live_experiments.py::test_virtual_monorepo_relpath\": 0.9138851999998678,\r\n    \"tests/unit/cli/test_main.py::test_ignore_in_collected_dir_error_is_logged\": 0.017126100000041333,\r\n    \"tests/unit/cli/test_main.py::test_remote_missing_deps_are_correctly_reported[None-Please report this bug to]\": 0.014922700000056466,\r\n    \"tests/unit/cli/test_main.py::test_remote_missing_deps_are_correctly_reported[conda-conda install -c conda-forge dvc-proto]\": 0.015353699999991477,\r\n    \"tests/unit/cli/test_main.py::test_remote_missing_deps_are_correctly_reported[pip-pip install 'dvc[proto]']\": 0.013537000000042099,\r\n    \"tests/unit/cli/test_main.py::test_state_pickle_errors_are_correctly_raised\": 0.0165122999999312,\r\n    \"tests/unit/command/ls/test_ls.py::test_list\": 0.006342400000107773,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_alias\": 0.0035484999999653155,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_config\": 0.005209800000216092,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_git_ssh_rev\": 0.00511940000001232,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_outputs_only\": 0.006585399999949004,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_recursive\": 0.005100900000002184,\r\n    \"tests/unit/command/ls/test_ls.py::test_list_targets\": 0.005044999999995525,\r\n    \"tests/unit/command/ls/test_ls.py::test_show_colors\": 0.004921199999898818,\r\n    \"tests/unit/command/ls/test_ls.py::test_show_json\": 0.004522199999883014,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_dir\": 0.003349100000036742,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_exec\": 0.003280899999936082,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_ext\": 0.003362700000025143,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_file\": 0.003305399999931069,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_out_dir\": 0.003481700000065757,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_out_exec\": 0.0033405999998876723,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_out_ext\": 0.0033998999998630097,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_colors_out_file\": 0.0033174999999801003,\r\n    \"tests/unit/command/ls/test_ls_colors.py::test_ls_repo_with_custom_color_env_defined\": 0.0035155000001623193,\r\n    \"tests/unit/command/test_add.py::test_add\": 0.20737009999982092,\r\n    \"tests/unit/command/test_add.py::test_add_to_cache_invalid_combinations\": 0.2307676000000356,\r\n    \"tests/unit/command/test_add.py::test_add_to_remote\": 0.23873700000001463,\r\n    \"tests/unit/command/test_add.py::test_add_to_remote_invalid_combinations\": 0.21601970000006077,\r\n    \"tests/unit/command/test_cache.py::test_cache_dir_local\": 0.16169510000008813,\r\n    \"tests/unit/command/test_checkout.py::test_checkout\": 0.2004144999999653,\r\n    \"tests/unit/command/test_checkout.py::test_log_changes\": 0.007630400000039117,\r\n    \"tests/unit/command/test_compat_flag.py::test_backward_compat_flags[exp-list-name_only]\": 0.0038943000000699612,\r\n    \"tests/unit/command/test_compat_flag.py::test_backward_compat_flags[stage-list-name_only]\": 0.0037500000000818545,\r\n    \"tests/unit/command/test_completion.py::test_completion[bash]\": 0.01732660000004671,\r\n    \"tests/unit/command/test_completion.py::test_completion[zsh]\": 0.04076810000003661,\r\n    \"tests/unit/command/test_config.py::test_config_bad_name[no_option]\": 0.0055750999999872874,\r\n    \"tests/unit/command/test_config.py::test_config_bad_name[remote.way.too.long]\": 0.004850099999998747,\r\n    \"tests/unit/command/test_config.py::test_config_bad_name[way.too.long]\": 0.005234700000073644,\r\n    \"tests/unit/command/test_config.py::test_config_formatter\": 0.0034963999999035877,\r\n    \"tests/unit/command/test_dag.py::test_build\": 0.41772169999990183,\r\n    \"tests/unit/command/test_dag.py::test_build_full\": 0.43086940000011964,\r\n    \"tests/unit/command/test_dag.py::test_build_full_outs[False]\": 0.4505840999999009,\r\n    \"tests/unit/command/test_dag.py::test_build_full_outs[True]\": 0.4292612999998937,\r\n    \"tests/unit/command/test_dag.py::test_build_granular_target_with_outs\": 0.4434605000001284,\r\n    \"tests/unit/command/test_dag.py::test_build_target\": 0.4442584999999326,\r\n    \"tests/unit/command/test_dag.py::test_build_target_with_outs\": 0.4607635999998365,\r\n    \"tests/unit/command/test_dag.py::test_dag[--dot-_show_dot]\": 0.2584747000000789,\r\n    \"tests/unit/command/test_dag.py::test_dag[--md-_show_mermaid]\": 0.2575375000001259,\r\n    \"tests/unit/command/test_dag.py::test_dag[--mermaid-_show_mermaid]\": 0.2538787000000866,\r\n    \"tests/unit/command/test_dag.py::test_dag[None-_show_ascii]\": 0.2701758000000609,\r\n    \"tests/unit/command/test_dag.py::test_show_ascii\": 0.47154680000005555,\r\n    \"tests/unit/command/test_dag.py::test_show_dot\": 0.49623429999996915,\r\n    \"tests/unit/command/test_dag.py::test_show_dot_properly_escapes\": 0.005304700000010598,\r\n    \"tests/unit/command/test_dag.py::test_show_mermaid\": 0.506922099999997,\r\n    \"tests/unit/command/test_dag.py::test_show_mermaid_markdown\": 0.46951670000021295,\r\n    \"tests/unit/command/test_data_status.py::test_cli\": 0.15436669999996866,\r\n    \"tests/unit/command/test_data_status.py::test_empty_scm_repo\": 0.44124859999988075,\r\n    \"tests/unit/command/test_data_status.py::test_json[args0-to_omit0]\": 0.19605909999984306,\r\n    \"tests/unit/command/test_data_status.py::test_json[args1-to_omit1]\": 0.18398960000013176,\r\n    \"tests/unit/command/test_data_status.py::test_json[args2-to_omit2]\": 0.205250799999817,\r\n    \"tests/unit/command/test_data_status.py::test_no_changes_repo\": 0.30644970000003013,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[False-args0]\": 0.2457772999999861,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[False-args1]\": 0.26650870000003124,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[False-args2]\": 0.2271581000001106,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[True-args0]\": 0.27466419999996106,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[True-args1]\": 0.2749612000001207,\r\n    \"tests/unit/command/test_data_status.py::test_show_status[True-args2]\": 0.25732209999978295,\r\n    \"tests/unit/command/test_data_sync.py::test_fetch\": 0.17609809999987647,\r\n    \"tests/unit/command/test_data_sync.py::test_pull\": 0.17549940000003517,\r\n    \"tests/unit/command/test_data_sync.py::test_push\": 0.16576090000000931,\r\n    \"tests/unit/command/test_dataset.py::test_add[spec0-lock0-Adding ds (url:/path @ main)\\\\n]\": 0.22054290000005494,\r\n    \"tests/unit/command/test_dataset.py::test_add[spec1-lock1-Adding mydataset (dc://dataset @ v1)\\\\n]\": 0.22979979999990974,\r\n    \"tests/unit/command/test_dataset.py::test_add[spec2-lock2-Adding mydataset (s3://bucket/path)\\\\n]\": 0.20264819999988504,\r\n    \"tests/unit/command/test_dataset.py::test_add_already_exists\": 0.2456899000001158,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec0-old_lock0-new_lock0-expected_outputs0-missing]\": 0.2354315999998562,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec0-old_lock0-new_lock0-expected_outputs0-unchanged]\": 0.23527079999996658,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec0-old_lock0-new_lock0-expected_outputs0-updated]\": 0.21899649999988924,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec1-old_lock1-new_lock1-expected_outputs1-missing]\": 0.18976390000011634,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec1-old_lock1-new_lock1-expected_outputs1-unchanged]\": 0.18847329999994145,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec1-old_lock1-new_lock1-expected_outputs1-updated]\": 0.17528420000007827,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec2-old_lock2-new_lock2-expected_outputs2-missing]\": 0.1846043999998983,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec2-old_lock2-new_lock2-expected_outputs2-unchanged]\": 0.2048106000000871,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec2-old_lock2-new_lock2-expected_outputs2-updated]\": 0.18596199999990404,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec3-old_lock3-new_lock3-expected_outputs3-missing]\": 0.17655880000006619,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec3-old_lock3-new_lock3-expected_outputs3-unchanged]\": 0.30956070000002,\r\n    \"tests/unit/command/test_dataset.py::test_update[spec3-old_lock3-new_lock3-expected_outputs3-updated]\": 0.17832229999987703,\r\n    \"tests/unit/command/test_diff.py::test_default\": 0.157224000000042,\r\n    \"tests/unit/command/test_diff.py::test_diff_show_markdown_and_hash[False]\": 0.16265659999999116,\r\n    \"tests/unit/command/test_diff.py::test_diff_show_markdown_and_hash[None]\": 0.21256189999996877,\r\n    \"tests/unit/command/test_diff.py::test_diff_show_markdown_and_hash[True]\": 0.15831829999990532,\r\n    \"tests/unit/command/test_diff.py::test_digest[dict]\": 0.0036724000000276646,\r\n    \"tests/unit/command/test_diff.py::test_digest[str]\": 0.0036453000000165048,\r\n    \"tests/unit/command/test_diff.py::test_hide_missing\": 0.2271087000000307,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show0--opts0]\": 0.20476959999996325,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show0--opts1]\": 0.1829457999999704,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show0--opts2]\": 0.15757240000004913,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show0--opts3]\": 0.21165159999998195,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show1-{}-opts0]\": 0.17485999999996693,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show1-{}-opts1]\": 0.16453910000007,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show1-{}-opts2]\": 0.2937568000000965,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show1-{}-opts3]\": 0.20795019999991382,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show2-| Status   | Path   |\\\\n|----------|--------|-opts0]\": 0.2065845999998146,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show2-| Status   | Path   |\\\\n|----------|--------|-opts1]\": 0.26602109999987533,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show2-| Status   | Path   |\\\\n|----------|--------|-opts2]\": 0.1966783000000305,\r\n    \"tests/unit/command/test_diff.py::test_no_changes[show2-| Status   | Path   |\\\\n|----------|--------|-opts3]\": 0.2248160999997708,\r\n    \"tests/unit/command/test_diff.py::test_show_hash\": 0.18903350000005048,\r\n    \"tests/unit/command/test_diff.py::test_show_json\": 0.1852141000000529,\r\n    \"tests/unit/command/test_diff.py::test_show_json_and_hash\": 0.16317690000005314,\r\n    \"tests/unit/command/test_diff.py::test_show_json_hide_missing\": 0.1684483999999884,\r\n    \"tests/unit/command/test_diff.py::test_show_markdown\": 0.004335099999934755,\r\n    \"tests/unit/command/test_diff.py::test_show_markdown_hide_missing\": 0.004313600000045881,\r\n    \"tests/unit/command/test_diff.py::test_show_markdown_with_hash\": 0.004763400000115325,\r\n    \"tests/unit/command/test_du.py::test_du\": 0.004861399999867899,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_apply\": 0.2363333999999213,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_branch\": 0.2514846999999918,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_clean\": 0.21406049999995957,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_diff\": 0.2540403000000424,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_diff_revs\": 0.194408400000043,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_list\": 0.2429000999999289,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_list_format[args0-master:\\\\n\\\\tsha-a [exp-a]\\\\n]\": 0.2363052000001744,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_list_format[args1-exp-a\\\\n]\": 0.23047929999984262,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_list_format[args2-sha-a\\\\n]\": 0.24126819999992222,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_list_remote\": 0.22307360000013432,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_pull\": 0.23942060000001675,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_push\": 0.25641060000009475,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_remove_flag\": 0.24051600000007056,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_remove_invalid\": 0.27601579999998194,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_remove_special\": 0.378145199999949,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_rename_flag\": 0.2838232000000289,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_rename_invalid\": 0.3273948000000928,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_run\": 0.26182230000006257,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_run_message[--message]\": 0.25680639999995947,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_run_message[-M]\": 0.2054465999999593,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_run_message[-m]\": 0.22659100000009857,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_save\": 0.34348650000004,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_save_message[--message]\": 0.2114745999999741,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_save_message[-M]\": 0.26990299999988565,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_save_message[-m]\": 0.24079059999996844,\r\n    \"tests/unit/command/test_experiments.py::test_experiments_show\": 0.2314608000001499,\r\n    \"tests/unit/command/test_gc.py::test_\": 0.31430200000011155,\r\n    \"tests/unit/command/test_get.py::test_get\": 0.004811700000004748,\r\n    \"tests/unit/command/test_get.py::test_get_url\": 0.004623800000103984,\r\n    \"tests/unit/command/test_get_url.py::test_get_url\": 0.0063950999999633495,\r\n    \"tests/unit/command/test_git_hook.py::test_out_of_repo[post-checkout-CmdPostCheckout]\": 0.01685439999994287,\r\n    \"tests/unit/command/test_git_hook.py::test_out_of_repo[pre-commit-CmdPreCommit]\": 0.019848099999990154,\r\n    \"tests/unit/command/test_git_hook.py::test_out_of_repo[pre-push-CmdPrePush]\": 0.016037099999948623,\r\n    \"tests/unit/command/test_help.py::test_help[add]\": 0.00516990000005535,\r\n    \"tests/unit/command/test_help.py::test_help[artifacts-get]\": 0.005396499999960724,\r\n    \"tests/unit/command/test_help.py::test_help[artifacts]\": 0.004762099999993552,\r\n    \"tests/unit/command/test_help.py::test_help[cache-dir]\": 0.005094600000120408,\r\n    \"tests/unit/command/test_help.py::test_help[cache-migrate]\": 0.004777600000011262,\r\n    \"tests/unit/command/test_help.py::test_help[cache]\": 0.004717299999924762,\r\n    \"tests/unit/command/test_help.py::test_help[check-ignore]\": 0.004871100000173101,\r\n    \"tests/unit/command/test_help.py::test_help[checkout]\": 0.005995600000005652,\r\n    \"tests/unit/command/test_help.py::test_help[commit]\": 0.005087700000103723,\r\n    \"tests/unit/command/test_help.py::test_help[completion]\": 0.004948499999954947,\r\n    \"tests/unit/command/test_help.py::test_help[config]\": 0.0050775999999359556,\r\n    \"tests/unit/command/test_help.py::test_help[daemon-analytics]\": 0.005583000000115135,\r\n    \"tests/unit/command/test_help.py::test_help[daemon-updater]\": 0.004609500000015032,\r\n    \"tests/unit/command/test_help.py::test_help[dag]\": 0.004961499999922125,\r\n    \"tests/unit/command/test_help.py::test_help[data-status]\": 0.004912400000080197,\r\n    \"tests/unit/command/test_help.py::test_help[dataset-add]\": 0.004888800000003357,\r\n    \"tests/unit/command/test_help.py::test_help[dataset-update]\": 0.004693000000088432,\r\n    \"tests/unit/command/test_help.py::test_help[destroy]\": 0.004675899999938338,\r\n    \"tests/unit/command/test_help.py::test_help[diff]\": 0.005075299999930394,\r\n    \"tests/unit/command/test_help.py::test_help[doctor]\": 0.004543700000112949,\r\n    \"tests/unit/command/test_help.py::test_help[ds-add]\": 0.00488899999993464,\r\n    \"tests/unit/command/test_help.py::test_help[ds-update]\": 0.0046986999998352985,\r\n    \"tests/unit/command/test_help.py::test_help[du]\": 0.0050435999999081105,\r\n    \"tests/unit/command/test_help.py::test_help[dvc]\": 0.005414900000005218,\r\n    \"tests/unit/command/test_help.py::test_help[exp-apply]\": 0.004998699999987366,\r\n    \"tests/unit/command/test_help.py::test_help[exp-branch]\": 0.004858499999954802,\r\n    \"tests/unit/command/test_help.py::test_help[exp-clean]\": 0.005622000000016669,\r\n    \"tests/unit/command/test_help.py::test_help[exp-diff]\": 0.005296399999906498,\r\n    \"tests/unit/command/test_help.py::test_help[exp-list]\": 0.005151499999897169,\r\n    \"tests/unit/command/test_help.py::test_help[exp-ls]\": 0.005351799999971263,\r\n    \"tests/unit/command/test_help.py::test_help[exp-pull]\": 0.005486500000074557,\r\n    \"tests/unit/command/test_help.py::test_help[exp-push]\": 0.005553100000156519,\r\n    \"tests/unit/command/test_help.py::test_help[exp-remove]\": 0.00527179999994587,\r\n    \"tests/unit/command/test_help.py::test_help[exp-rename]\": 0.004908799999839175,\r\n    \"tests/unit/command/test_help.py::test_help[exp-rm]\": 0.005404200000157289,\r\n    \"tests/unit/command/test_help.py::test_help[exp-run]\": 0.00624249999998483,\r\n    \"tests/unit/command/test_help.py::test_help[exp-save]\": 0.0052524000000175874,\r\n    \"tests/unit/command/test_help.py::test_help[exp-show]\": 0.006198300000050949,\r\n    \"tests/unit/command/test_help.py::test_help[exp]\": 0.005255499999861968,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-apply]\": 0.004880999999954838,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-branch]\": 0.004828900000006797,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-clean]\": 0.005543899999906898,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-diff]\": 0.0051723999998785075,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-list]\": 0.005507799999804774,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-ls]\": 0.005242700000053446,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-pull]\": 0.005516199999988203,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-push]\": 0.005440700000008292,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-remove]\": 0.005233699999962482,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-rename]\": 0.005135099999961312,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-rm]\": 0.0052400999999235864,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-run]\": 0.00627030000009654,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-save]\": 0.005168700000012905,\r\n    \"tests/unit/command/test_help.py::test_help[experiments-show]\": 0.006394000000000233,\r\n    \"tests/unit/command/test_help.py::test_help[experiments]\": 0.005340799999885348,\r\n    \"tests/unit/command/test_help.py::test_help[fetch]\": 0.0055118999999876905,\r\n    \"tests/unit/command/test_help.py::test_help[freeze]\": 0.004637200000161101,\r\n    \"tests/unit/command/test_help.py::test_help[gc]\": 0.00605360000008659,\r\n    \"tests/unit/command/test_help.py::test_help[get-url]\": 0.005082600000037019,\r\n    \"tests/unit/command/test_help.py::test_help[get]\": 0.006310499999926833,\r\n    \"tests/unit/command/test_help.py::test_help[git-hook-merge-driver]\": 0.004797400000029484,\r\n    \"tests/unit/command/test_help.py::test_help[git-hook-post-checkout]\": 0.004774100000076942,\r\n    \"tests/unit/command/test_help.py::test_help[git-hook-pre-commit]\": 0.004705299999955059,\r\n    \"tests/unit/command/test_help.py::test_help[git-hook-pre-push]\": 0.004717099999993479,\r\n    \"tests/unit/command/test_help.py::test_help[import-db]\": 0.004886699999929078,\r\n    \"tests/unit/command/test_help.py::test_help[import-url]\": 0.005191599999989194,\r\n    \"tests/unit/command/test_help.py::test_help[import]\": 0.004990700000007564,\r\n    \"tests/unit/command/test_help.py::test_help[init]\": 0.004769599999917773,\r\n    \"tests/unit/command/test_help.py::test_help[install]\": 0.004509999999982028,\r\n    \"tests/unit/command/test_help.py::test_help[list-url]\": 0.005079899999941517,\r\n    \"tests/unit/command/test_help.py::test_help[list]\": 0.005139899999903719,\r\n    \"tests/unit/command/test_help.py::test_help[ls-url]\": 0.0051078000000188695,\r\n    \"tests/unit/command/test_help.py::test_help[ls]\": 0.005072499999982938,\r\n    \"tests/unit/command/test_help.py::test_help[metrics-diff]\": 0.006496099999935723,\r\n    \"tests/unit/command/test_help.py::test_help[metrics-show]\": 0.005230500000038774,\r\n    \"tests/unit/command/test_help.py::test_help[metrics]\": 0.004968199999893841,\r\n    \"tests/unit/command/test_help.py::test_help[move]\": 0.004605499999911444,\r\n    \"tests/unit/command/test_help.py::test_help[mv]\": 0.004743300000086492,\r\n    \"tests/unit/command/test_help.py::test_help[params-diff]\": 0.00525409999988824,\r\n    \"tests/unit/command/test_help.py::test_help[params]\": 0.004771499999947082,\r\n    \"tests/unit/command/test_help.py::test_help[plots-diff]\": 0.005646700000056626,\r\n    \"tests/unit/command/test_help.py::test_help[plots-modify]\": 0.0052711999998109604,\r\n    \"tests/unit/command/test_help.py::test_help[plots-show]\": 0.005484400000000278,\r\n    \"tests/unit/command/test_help.py::test_help[plots-templates]\": 0.0047318000000586835,\r\n    \"tests/unit/command/test_help.py::test_help[plots]\": 0.0047816000001148495,\r\n    \"tests/unit/command/test_help.py::test_help[pull]\": 0.005406800000059775,\r\n    \"tests/unit/command/test_help.py::test_help[push]\": 0.006573299999899973,\r\n    \"tests/unit/command/test_help.py::test_help[queue-kill]\": 0.004956300000003466,\r\n    \"tests/unit/command/test_help.py::test_help[queue-logs]\": 0.00587479999990137,\r\n    \"tests/unit/command/test_help.py::test_help[queue-remove]\": 0.0048679000000220185,\r\n    \"tests/unit/command/test_help.py::test_help[queue-start]\": 0.00466180000012173,\r\n    \"tests/unit/command/test_help.py::test_help[queue-status]\": 0.004603199999792196,\r\n    \"tests/unit/command/test_help.py::test_help[queue-stop]\": 0.004899899999941226,\r\n    \"tests/unit/command/test_help.py::test_help[queue]\": 0.005185999999980595,\r\n    \"tests/unit/command/test_help.py::test_help[remote-add]\": 0.005126499999960288,\r\n    \"tests/unit/command/test_help.py::test_help[remote-default]\": 0.004988600000046972,\r\n    \"tests/unit/command/test_help.py::test_help[remote-list]\": 0.0049685999999837804,\r\n    \"tests/unit/command/test_help.py::test_help[remote-modify]\": 0.005088500000056229,\r\n    \"tests/unit/command/test_help.py::test_help[remote-remove]\": 0.004826299999990624,\r\n    \"tests/unit/command/test_help.py::test_help[remote-rename]\": 0.005236899999999878,\r\n    \"tests/unit/command/test_help.py::test_help[remote]\": 0.004741699999954108,\r\n    \"tests/unit/command/test_help.py::test_help[remove]\": 0.004911599999900318,\r\n    \"tests/unit/command/test_help.py::test_help[repro]\": 0.005877499999996871,\r\n    \"tests/unit/command/test_help.py::test_help[rm]\": 0.004688199999918652,\r\n    \"tests/unit/command/test_help.py::test_help[root]\": 0.00459539999997105,\r\n    \"tests/unit/command/test_help.py::test_help[stage-add]\": 0.005957599999987906,\r\n    \"tests/unit/command/test_help.py::test_help[stage-list]\": 0.004868999999985135,\r\n    \"tests/unit/command/test_help.py::test_help[stage]\": 0.005666300000143565,\r\n    \"tests/unit/command/test_help.py::test_help[status]\": 0.005589900000018133,\r\n    \"tests/unit/command/test_help.py::test_help[studio-login]\": 0.005103100000155791,\r\n    \"tests/unit/command/test_help.py::test_help[studio-logout]\": 0.004475100000036036,\r\n    \"tests/unit/command/test_help.py::test_help[studio-token]\": 0.004713700000024801,\r\n    \"tests/unit/command/test_help.py::test_help[studio]\": 0.004740799999922274,\r\n    \"tests/unit/command/test_help.py::test_help[unfreeze]\": 0.0046703999998953805,\r\n    \"tests/unit/command/test_help.py::test_help[unprotect]\": 0.0046142999999574386,\r\n    \"tests/unit/command/test_help.py::test_help[update]\": 0.0050053999999590815,\r\n    \"tests/unit/command/test_help.py::test_help[version]\": 0.004662299999949937,\r\n    \"tests/unit/command/test_imp.py::test_import\": 0.15950039999984256,\r\n    \"tests/unit/command/test_imp.py::test_import_no_download\": 0.17638509999983398,\r\n    \"tests/unit/command/test_imp.py::test_import_no_exec\": 0.16130900000018755,\r\n    \"tests/unit/command/test_imp_url.py::test_failed_import_url\": 0.24874369999997725,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url\": 0.16240170000003218,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_no_exec_download_flags[--no-download-expected1]\": 0.1616487999998526,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_no_exec_download_flags[--no-exec-expected0]\": 0.15735030000007555,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_to_remote\": 0.23831790000008368,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_to_remote_flag\": 0.2676136000000042,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_to_remote_invalid_combination[--no-download]\": 0.23121040000000903,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_to_remote_invalid_combination[--no-exec]\": 0.22697579999999107,\r\n    \"tests/unit/command/test_imp_url.py::test_import_url_to_remote_invalid_combination[--version-aware]\": 0.2488975000001119,\r\n    \"tests/unit/command/test_ls_url.py::test_ls_url\": 0.007741799999962495,\r\n    \"tests/unit/command/test_ls_url.py::test_recursive\": 0.007042000000069493,\r\n    \"tests/unit/command/test_metrics.py::test_metrics_diff\": 0.17783269999983986,\r\n    \"tests/unit/command/test_metrics.py::test_metrics_diff_json\": 0.16102879999982633,\r\n    \"tests/unit/command/test_metrics.py::test_metrics_show\": 0.15935780000006616,\r\n    \"tests/unit/command/test_metrics.py::test_metrics_show_json\": 0.19421349999993254,\r\n    \"tests/unit/command/test_params.py::test_params_diff\": 0.1581121000000394,\r\n    \"tests/unit/command/test_params.py::test_params_diff_from_cli\": 0.1667267999998785,\r\n    \"tests/unit/command/test_params.py::test_params_diff_show_json\": 0.22336029999985385,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff\": 0.16223200000001725,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_json\": 0.8370906000001241,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_open[False]\": 0.20436099999983526,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_open[True]\": 0.16055760000006103,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_open_failed\": 0.18257810000011432,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_open_wsl\": 0.16334660000006807,\r\n    \"tests/unit/command/test_plots.py::test_plots_diff_vega\": 0.21606650000001082,\r\n    \"tests/unit/command/test_plots.py::test_plots_path_is_quoted_and_resolved_properly[quote]\": 0.22137190000012197,\r\n    \"tests/unit/command/test_plots.py::test_plots_path_is_quoted_and_resolved_properly[resolve]\": 0.16760520000002543,\r\n    \"tests/unit/command/test_plots.py::test_plots_show_vega\": 0.20280970000021625,\r\n    \"tests/unit/command/test_plots.py::test_plots_templates[None-t1\\\\nt2-0]\": 0.16750550000006115,\r\n    \"tests/unit/command/test_plots.py::test_plots_templates[t1-\\\"{'t1'}\\\"-0]\": 0.14744550000011714,\r\n    \"tests/unit/command/test_plots.py::test_plots_templates[t3--1]\": 0.19756789999996727,\r\n    \"tests/unit/command/test_plots.py::test_should_call_render[None]\": 0.02213379999989229,\r\n    \"tests/unit/command/test_plots.py::test_should_call_render[some_out]\": 0.02298040000016499,\r\n    \"tests/unit/command/test_plots.py::test_should_call_render[to\\\\\\\\subdir]\": 0.021559099999990394,\r\n    \"tests/unit/command/test_plots.py::test_should_pass_template_dir\": 0.20337180000001354,\r\n    \"tests/unit/command/test_plots.py::test_show_json[False]\": 0.004578400000013971,\r\n    \"tests/unit/command/test_plots.py::test_show_json[True]\": 0.004965399999946385,\r\n    \"tests/unit/command/test_plots.py::test_show_json_no_renderers\": 0.0037462000000232365,\r\n    \"tests/unit/command/test_plots.py::test_show_json_with_error\": 0.18920730000002095,\r\n    \"tests/unit/command/test_queue.py::test_experiments_kill\": 0.2445196000001033,\r\n    \"tests/unit/command/test_queue.py::test_experiments_remove_flags\": 0.31704769999998916,\r\n    \"tests/unit/command/test_queue.py::test_experiments_remove_invalid\": 0.297946599999932,\r\n    \"tests/unit/command/test_queue.py::test_experiments_remove_name\": 0.26617610000005243,\r\n    \"tests/unit/command/test_queue.py::test_experiments_start\": 1.2269019999998818,\r\n    \"tests/unit/command/test_queue.py::test_experiments_status\": 1.2477585000001454,\r\n    \"tests/unit/command/test_queue.py::test_experiments_stop\": 0.27853619999996226,\r\n    \"tests/unit/command/test_queue.py::test_queue_logs\": 0.2533302999999023,\r\n    \"tests/unit/command/test_queue.py::test_worker_status[worker_status0-Worker status: 0 active, 2 idle]\": 0.2085013999999319,\r\n    \"tests/unit/command/test_queue.py::test_worker_status[worker_status1-Worker status: 2 active, 1 idle]\": 0.21324529999992592,\r\n    \"tests/unit/command/test_queue.py::test_worker_status[worker_status2-Worker status: 1 active, 0 idle]\": 0.26774479999983214,\r\n    \"tests/unit/command/test_repro.py::test_default_arguments\": 0.24381379999999808,\r\n    \"tests/unit/command/test_repro.py::test_downstream\": 0.2228683999999248,\r\n    \"tests/unit/command/test_stage.py::test_stage_add[command0-echo foo bar]\": 0.232102399999917,\r\n    \"tests/unit/command/test_stage.py::test_stage_add[command1-echo \\\"foo bar\\\"]\": 0.2880078999999114,\r\n    \"tests/unit/command/test_stage.py::test_stage_add[command2-echo \\\"foo bar\\\"]\": 0.17658459999995557,\r\n    \"tests/unit/command/test_stage.py::test_stage_add[command3-cmd --flag \\\"\\\"]\": 0.19729919999997492,\r\n    \"tests/unit/command/test_stage.py::test_stage_add_and_run\": 0.17568000000005668,\r\n    \"tests/unit/command/test_status.py::test_cloud_status\": 0.16782099999988986,\r\n    \"tests/unit/command/test_status.py::test_status_check_updates\": 0.19725930000004155,\r\n    \"tests/unit/command/test_status.py::test_status_empty\": 0.1967822999999953,\r\n    \"tests/unit/command/test_status.py::test_status_quiet[status0-0]\": 0.19551589999991847,\r\n    \"tests/unit/command/test_status.py::test_status_quiet[status1-1]\": 0.17035260000000108,\r\n    \"tests/unit/command/test_status.py::test_status_quiet[status2-1]\": 0.19946329999993395,\r\n    \"tests/unit/command/test_status.py::test_status_show_json[status0]\": 0.1761330999999018,\r\n    \"tests/unit/command/test_status.py::test_status_show_json[status1]\": 0.17701260000001184,\r\n    \"tests/unit/command/test_status.py::test_status_show_json[status2]\": 0.18058289999999033,\r\n    \"tests/unit/command/test_status.py::test_status_up_to_date[cloud_opts0-Cache and remote 'default' are in sync]\": 0.17520509999997103,\r\n    \"tests/unit/command/test_status.py::test_status_up_to_date[cloud_opts1-Cache and remote 'remote1' are in sync]\": 0.16241600000000744,\r\n    \"tests/unit/command/test_status.py::test_status_up_to_date[cloud_opts2-Data and pipelines are up to date]\": 0.174744399999895,\r\n    \"tests/unit/command/test_studio.py::test_studio_login_arguments\": 0.020730800000137606,\r\n    \"tests/unit/command/test_studio.py::test_studio_login_success\": 0.1833499000001666,\r\n    \"tests/unit/command/test_studio.py::test_studio_login_token_check_failed\": 0.016864599999962593,\r\n    \"tests/unit/command/test_studio.py::test_studio_logout\": 0.16298749999998563,\r\n    \"tests/unit/command/test_studio.py::test_studio_token\": 0.15698469999995268,\r\n    \"tests/unit/command/test_update.py::test_update\": 0.17654290000007222,\r\n    \"tests/unit/command/test_update.py::test_update_to_remote\": 0.15949060000002646,\r\n    \"tests/unit/data/db/test_local.py::test_is_protected[hardlink]\": 0.14557190000004994,\r\n    \"tests/unit/data/db/test_local.py::test_is_protected[symlink]\": 0.1829543000000058,\r\n    \"tests/unit/data/db/test_local.py::test_protect_ignore_errors[13]\": 0.18708559999993213,\r\n    \"tests/unit/data/db/test_local.py::test_protect_ignore_errors[1]\": 0.16553050000004532,\r\n    \"tests/unit/data/db/test_local.py::test_protect_ignore_errors[30]\": 0.1776026999999658,\r\n    \"tests/unit/data/db/test_local.py::test_set_exec_ignore_errors[13]\": 0.22822449999989658,\r\n    \"tests/unit/data/db/test_local.py::test_set_exec_ignore_errors[1]\": 0.20305240000027425,\r\n    \"tests/unit/data/db/test_local.py::test_set_exec_ignore_errors[30]\": 0.1831402000000253,\r\n    \"tests/unit/data/db/test_local.py::test_staging_dir\": 0.230100199999697,\r\n    \"tests/unit/data/db/test_local.py::test_staging_file\": 0.20912219999991066,\r\n    \"tests/unit/data/db/test_local.py::test_status_download_optimization\": 0.14161040000010416,\r\n    \"tests/unit/dependency/test_dependency.py::test_save_missing\": 0.19652719999999135,\r\n    \"tests/unit/dependency/test_params.py::test_dumpd_with_info\": 0.15128699999991113,\r\n    \"tests/unit/dependency/test_params.py::test_dumpd_without_info\": 0.16029950000029203,\r\n    \"tests/unit/dependency/test_params.py::test_get_hash_missing_config\": 0.13132500000006075,\r\n    \"tests/unit/dependency/test_params.py::test_get_hash_missing_param\": 0.1511273999997229,\r\n    \"tests/unit/dependency/test_params.py::test_loadd_from\": 0.14914219999991474,\r\n    \"tests/unit/dependency/test_params.py::test_loads_params\": 0.1819633000000067,\r\n    \"tests/unit/dependency/test_params.py::test_loads_params_without_any_specific_targets\": 0.2397292999999081,\r\n    \"tests/unit/dependency/test_params.py::test_params_error[params0-Only list of str/dict is supported. Got: 'int']\": 0.17743289999998524,\r\n    \"tests/unit/dependency/test_params.py::test_params_error[params1-Expected list of params for custom params file 'b_file', got 'str'.]\": 0.14181680000001506,\r\n    \"tests/unit/dependency/test_params.py::test_params_py_tuple_status\": 0.1323810000001231,\r\n    \"tests/unit/dependency/test_params.py::test_params_status_without_targets\": 0.14940939999996772,\r\n    \"tests/unit/dependency/test_params.py::test_params_with_false_values[[]]\": 0.14156870000010713,\r\n    \"tests/unit/dependency/test_params.py::test_params_with_false_values[]\": 0.14010119999989,\r\n    \"tests/unit/dependency/test_params.py::test_params_with_false_values[false]\": 0.145325400000047,\r\n    \"tests/unit/dependency/test_params.py::test_params_with_false_values[null]\": 0.13275300000009338,\r\n    \"tests/unit/dependency/test_params.py::test_params_with_false_values[{}]\": 0.13771410000003925,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_default_loader\": 0.1354366000000482,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_nested\": 0.14428989999987607,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_nonexistent_file\": 0.14137549999986732,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_py\": 0.15157290000001922,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_toml\": 0.1404378000001998,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_unsupported_format\": 0.1372903999999835,\r\n    \"tests/unit/dependency/test_params.py::test_read_params_wrong_suffix\": 0.16509849999988546,\r\n    \"tests/unit/fs/test_base.py::test_missing_deps\": 0.00495150000006106,\r\n    \"tests/unit/fs/test_data.py::test_exists\": 0.35812570000030064,\r\n    \"tests/unit/fs/test_data.py::test_get_hash_dir\": 0.3086691000000883,\r\n    \"tests/unit/fs/test_data.py::test_get_hash_dirty_dir\": 0.43394499999999425,\r\n    \"tests/unit/fs/test_data.py::test_get_hash_dirty_file\": 0.38452159999997093,\r\n    \"tests/unit/fs/test_data.py::test_get_hash_file\": 0.2722163000003093,\r\n    \"tests/unit/fs/test_data.py::test_get_hash_granular\": 0.3548885000000155,\r\n    \"tests/unit/fs/test_data.py::test_get_key[-key0]\": 0.2359702999997353,\r\n    \"tests/unit/fs/test_data.py::test_get_key[.-key1]\": 0.20508520000021235,\r\n    \"tests/unit/fs/test_data.py::test_get_key[/-key2]\": 0.1873142999997981,\r\n    \"tests/unit/fs/test_data.py::test_get_key[dir/foo-key4]\": 0.261411299999736,\r\n    \"tests/unit/fs/test_data.py::test_get_key[foo-key3]\": 0.3334287999998651,\r\n    \"tests/unit/fs/test_data.py::test_isdir_isfile\": 0.36860460000002604,\r\n    \"tests/unit/fs/test_data.py::test_isdir_mixed\": 0.2752560999997513,\r\n    \"tests/unit/fs/test_data.py::test_open\": 0.38896999999997206,\r\n    \"tests/unit/fs/test_data.py::test_open_dirty_hash\": 0.3455529000000297,\r\n    \"tests/unit/fs/test_data.py::test_open_dirty_no_hash\": 0.22985790000007,\r\n    \"tests/unit/fs/test_data.py::test_open_in_history\": 0.5580856999999924,\r\n    \"tests/unit/fs/test_data.py::test_open_no_remote\": 0.29373580000014954,\r\n    \"tests/unit/fs/test_data.py::test_walk\": 0.45851840000000266,\r\n    \"tests/unit/fs/test_data.py::test_walk_dir\": 0.3647290999999768,\r\n    \"tests/unit/fs/test_data.py::test_walk_missing\": 0.21059100000024955,\r\n    \"tests/unit/fs/test_data.py::test_walk_not_a_dir\": 0.28855810000004567,\r\n    \"tests/unit/fs/test_dvc.py::test_dvcfs_no_subrepos\": 1.1262057000001278,\r\n    \"tests/unit/fs/test_dvc.py::test_exists\": 0.4299327999999605,\r\n    \"tests/unit/fs/test_dvc.py::test_exists_isdir_isfile_dirty\": 0.3719997999999123,\r\n    \"tests/unit/fs/test_dvc.py::test_fsid\": 0.3600894999999582,\r\n    \"tests/unit/fs/test_dvc.py::test_fsid_noscm\": 0.17050810000000638,\r\n    \"tests/unit/fs/test_dvc.py::test_fsid_url\": 0.6390982999998869,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_cached_dir\": 0.3788449999997283,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_cached_file\": 0.2868271000002096,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_cached_granular\": 0.32177900000010595,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_dirty_dir\": 0.3100024000002577,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_dirty_file\": 0.27687379999997574,\r\n    \"tests/unit/fs/test_dvc.py::test_get_hash_mixed_dir\": 0.44705369999996947,\r\n    \"tests/unit/fs/test_dvc.py::test_isdir_isfile\": 0.48258069999997133,\r\n    \"tests/unit/fs/test_dvc.py::test_isdir_mixed\": 0.28985499999998865,\r\n    \"tests/unit/fs/test_dvc.py::test_isdvc\": 0.3862890000000334,\r\n    \"tests/unit/fs/test_dvc.py::test_ls_dir_empty\": 0.2741804000002048,\r\n    \"tests/unit/fs/test_dvc.py::test_ls_dirty\": 0.2980661000001419,\r\n    \"tests/unit/fs/test_dvc.py::test_ls_file_not_found\": 0.2732452000000194,\r\n    \"tests/unit/fs/test_dvc.py::test_open\": 0.39393840000002456,\r\n    \"tests/unit/fs/test_dvc.py::test_open_dirty_hash\": 0.24628350000011778,\r\n    \"tests/unit/fs/test_dvc.py::test_open_dirty_no_hash\": 0.1439026000002741,\r\n    \"tests/unit/fs/test_dvc.py::test_open_in_history\": 0.5555142999999134,\r\n    \"tests/unit/fs/test_dvc.py::test_subrepo_walk[False-extra_expected0]\": 1.4545625999999174,\r\n    \"tests/unit/fs/test_dvc.py::test_subrepo_walk[True-extra_expected1]\": 1.7074651000002632,\r\n    \"tests/unit/fs/test_dvc.py::test_subrepos\": 1.6347952000000987,\r\n    \"tests/unit/fs/test_dvc.py::test_walk[False-extra_expected0]\": 0.3712035000000924,\r\n    \"tests/unit/fs/test_dvc.py::test_walk[True-extra_expected1]\": 0.5252416000000721,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_dirty\": 0.43012769999995726,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_dirty_cached_dir\": 0.5374512999999297,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_missing\": 0.20010729999989962,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_mixed_dir\": 0.5005822000000535,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_nested_subrepos[False]\": 2.6429789000001165,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_nested_subrepos[True]\": 2.859540100000004,\r\n    \"tests/unit/fs/test_dvc.py::test_walk_not_a_dir\": 0.2780836999997973,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_only_dirs[data/processed]\": 0.6769035000002077,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_only_dirs[data/raw]\": 0.7841137000002618,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_only_dirs[data]\": 0.6757109000000128,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_tracked_file[data/processed/processed-1.csv]\": 0.7904966999999488,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_tracked_file[data/processed/processed-2.csv]\": 0.7899893000001157,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_tracked_file[data/raw/raw-1.csv]\": 0.720624799999996,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_tracked_file[data/raw/raw-2.csv]\": 0.6412256000003254,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_dvc_tracked_file[models/transform.pickle]\": 0.6508121999997911,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_dvc_mixed_dirs[.]\": 0.634685300000001,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_dvc_mixed_dirs[models]\": 0.6856822000002012,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_only_dirs[src/utils]\": 0.8510240000000522,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_only_dirs[src]\": 0.7053726999999981,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_tracked_file[README.md]\": 0.8011664999999084,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_tracked_file[models/test.py]\": 0.7170016000000032,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_tracked_file[models/train.py]\": 0.6278869000000213,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_tracked_file[src/utils/__init__.py]\": 0.7002584999997907,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_git_tracked_file[src/utils/serve_model.py]\": 0.737733500000104,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_not_existing\": 0.7489877000000433,\r\n    \"tests/unit/fs/test_dvc_info.py::test_info_on_subrepos\": 1.311515800000052,\r\n    \"tests/unit/fs/test_fs.py::test_get_cloud_fs\": 0.003662800000029165,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[../file-LocalFileSystem]\": 0.003933200000119541,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[..\\\\\\\\file-LocalFileSystem]\": 0.0037267999998675805,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[./file-LocalFileSystem]\": 0.0037047000000711705,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[.\\\\\\\\file-LocalFileSystem]\": 0.0036365000000841974,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[file-LocalFileSystem]\": 0.0036200999998072803,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[hdfs://example.com/dir/path-HDFSFileSystem]\": 0.0038507000001573033,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[http://example.com/path/to/file-HTTPFileSystem]\": 0.0036757999998826563,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[https://example.com/path/to/file-HTTPSFileSystem]\": 0.0037765999998100597,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[path/to/file-LocalFileSystem]\": 0.0036463999999796215,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[path\\\\\\\\to\\\\\\\\file-LocalFileSystem]\": 0.0037243999997826904,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[s3://bucket/path-S3FileSystem]\": 0.004122800000004645,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[ssh://example.com:/dir/path-SSHFileSystem]\": 0.003962999999885142,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_cls[unknown://path-LocalFileSystem]\": 0.0038380000000870496,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_config\": 0.003567299999986062,\r\n    \"tests/unit/fs/test_fs.py::test_get_fs_config_error\": 0.0034471999997549574,\r\n    \"tests/unit/fs/test_fs.py::test_remote_url\": 0.0040197999999236345,\r\n    \"tests/unit/fs/test_tree.py::test_get_cloud_fs\": 0.1809981000001244,\r\n    \"tests/unit/fs/test_tree.py::test_get_cloud_fs_validate\": 0.1421276000000944,\r\n    \"tests/unit/output/test_annotations.py::test_annotation_to_dict[kwargs0]\": 0.0043542000000798,\r\n    \"tests/unit/output/test_annotations.py::test_annotation_to_dict[kwargs1]\": 0.0034908000002360495,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline[metrics]\": 0.14089249999983622,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline[outs]\": 0.1623079000000871,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline[plots]\": 0.1616214000000582,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_accumulates_flag\": 0.13705239999990226,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_error_on_typ[None]\": 0.14832630000023528,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_error_on_typ[]\": 0.1467334999997547,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_error_on_typ[illegal]\": 0.1416404000001421,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_illegal_type[3]\": 0.14419900000007146,\r\n    \"tests/unit/output/test_load.py::test_load_from_pipeline_illegal_type[key1]\": 0.1510142000001906,\r\n    \"tests/unit/output/test_load.py::test_load_remote\": 0.1515172999997958,\r\n    \"tests/unit/output/test_load.py::test_load_remote_files_from_pipeline\": 0.3239920999997139,\r\n    \"tests/unit/output/test_load.py::test_plots_load_from_pipeline\": 0.1858439999998609,\r\n    \"tests/unit/output/test_local.py::test_return_0_on_no_cache\": 0.18396800000004987,\r\n    \"tests/unit/output/test_local.py::test_return_1_on_single_file_cache\": 0.18803580000030706,\r\n    \"tests/unit/output/test_local.py::test_return_multiple_for_dir\": 0.17993090000004486,\r\n    \"tests/unit/output/test_local.py::test_str_on_external_absolute_path\": 0.17777769999997872,\r\n    \"tests/unit/output/test_local.py::test_str_on_local_absolute_path\": 0.21161090000009608,\r\n    \"tests/unit/output/test_local.py::test_str_workdir_inside_repo\": 0.14259830000014517,\r\n    \"tests/unit/output/test_local.py::test_str_workdir_outside_repo\": 0.19554309999989528,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[-None]\": 0.005544099999497121,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[000002000000000000000000c16859d1d071c6b1ffc9c8557d4909f1-000002000000000000000000c16859d1d071c6b1ffc9c8557d4909f1]\": 0.003528299999743467,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[11111-11111_0]\": 0.0035367000000405824,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[11111-11111_1]\": 0.003755199999886827,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[13393-13393]\": 0.0037766000000374333,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[3cc286c534a71504476da009ed174423-3cc286c534a71504476da009ed174423]\": 0.003689800000302057,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[676-676]\": 0.0036434999997254636,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[None-None]\": 0.003606799999715804,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[aAaBa-aaaba]\": 0.0035385999999562046,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema[d41d8cd98f00b204e9800998ecf8427e-38-d41d8cd98f00b204e9800998ecf8427e-38]\": 0.0037460000000919536,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[11]\": 0.0037425999998959014,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[1]\": 0.003756699999939883,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[value2]\": 0.00356540000007044,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[value3]\": 0.0036418999998204526,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[value4]\": 0.0035010999999940395,\r\n    \"tests/unit/output/test_output.py::test_checksum_schema_fail[value5]\": 0.0036368999999467633,\r\n    \"tests/unit/output/test_output.py::test_dumpd_cloud_versioning_dir\": 0.013638500000070053,\r\n    \"tests/unit/output/test_output.py::test_get_used_objs[False-Output 'path'(stage: 'stage.dvc') is missing version info. Cache for it will not be collected. Use `dvc repro` to get your pipeline up to date.]\": 0.013380899999901885,\r\n    \"tests/unit/output/test_output.py::test_get_used_objs[True-Output 'path'(stage: 'stage.dvc') is missing version info. Cache for it will not be collected. Use `dvc repro` to get your pipeline up to date.\\\\nYou can also use `dvc commit stage.dvc` to associate existing 'path' with stage: 'stage.dvc'.]\": 0.013191500000175438,\r\n    \"tests/unit/output/test_output.py::test_hash_info_cloud_versioning_dir\": 0.011704800000188698,\r\n    \"tests/unit/output/test_output.py::test_remote_missing_dependency_on_dir_pull\": 0.4997587999998814,\r\n    \"tests/unit/output/test_output.py::test_save_missing\": 0.17189609999991262,\r\n    \"tests/unit/output/test_output.py::test_version_aware_is_set_based_on_files\": 0.013280800000302406,\r\n    \"tests/unit/remote/test_oss.py::test_init\": 0.13651709999999184,\r\n    \"tests/unit/remote/test_remote.py::test_makedirs_not_create_for_top_level_path[GSFileSystem]\": 0.14460880000001453,\r\n    \"tests/unit/remote/test_remote.py::test_makedirs_not_create_for_top_level_path[S3FileSystem]\": 0.13261130000000776,\r\n    \"tests/unit/remote/test_remote.py::test_remote_with_hash_jobs\": 0.15536950000000616,\r\n    \"tests/unit/remote/test_remote.py::test_remote_with_jobs\": 0.16428399999995236,\r\n    \"tests/unit/remote/test_remote.py::test_remote_without_hash_jobs\": 0.16145559999995385,\r\n    \"tests/unit/remote/test_remote.py::test_remote_without_hash_jobs_default\": 0.1481607000000622,\r\n    \"tests/unit/remote/test_webdav.py::test_ask_password\": 0.00495260000025155,\r\n    \"tests/unit/remote/test_webdav.py::test_ask_password_custom_auth_header\": 0.00502760000017588,\r\n    \"tests/unit/remote/test_webdav.py::test_common\": 0.004034600000068167,\r\n    \"tests/unit/remote/test_webdav.py::test_custom_auth_header\": 0.0038762000001497654,\r\n    \"tests/unit/remote/test_webdav.py::test_password\": 0.003827599999794984,\r\n    \"tests/unit/remote/test_webdav.py::test_remote_with_jobs[webdav://username@example.com/public.php/webdav-WebDAVFileSystem]\": 0.15608989999986989,\r\n    \"tests/unit/remote/test_webdav.py::test_remote_with_jobs[webdavs://username@example.com/public.php/webdav-WebDAVSFileSystem]\": 0.14663779999978033,\r\n    \"tests/unit/remote/test_webdav.py::test_ssl_verify_custom_cert\": 0.00405799999998635,\r\n    \"tests/unit/remote/test_webdav.py::test_token\": 0.003971099999944272,\r\n    \"tests/unit/remote/test_webdav.py::test_user\": 0.003973999999971056,\r\n    \"tests/unit/remote/test_webhdfs.py::test_init\": 0.14084939999997914,\r\n    \"tests/unit/remote/test_webhdfs.py::test_verify_ssl\": 0.15593999999987318,\r\n    \"tests/unit/render/test_convert.py::test_to_json_image\": 0.0036106000000017957,\r\n    \"tests/unit/render/test_convert.py::test_to_json_vega\": 0.005109799999900133,\r\n    \"tests/unit/render/test_convert.py::test_to_json_vega_split\": 0.004524400000036621,\r\n    \"tests/unit/render/test_image_converter.py::test_image_converter_no_out\": 0.003433300000097006,\r\n    \"tests/unit/render/test_image_converter.py::test_image_converter_with_out\": 0.015260500000067623,\r\n    \"tests/unit/render/test_image_converter.py::test_image_converter_with_slash_in_revision\": 0.018313700000135213,\r\n    \"tests/unit/render/test_match.py::test_flat_datapoints_errors_are_caught\": 0.004753199999640856,\r\n    \"tests/unit/render/test_match.py::test_group_definitions[all]\": 0.003569299999753639,\r\n    \"tests/unit/render/test_match.py::test_group_definitions[multi_config]\": 0.003538899999966816,\r\n    \"tests/unit/render/test_match.py::test_group_definitions[multi_rev]\": 0.0034962000001996785,\r\n    \"tests/unit/render/test_match.py::test_group_definitions[multi_rev_multi_config]\": 0.0036884000001009554,\r\n    \"tests/unit/render/test_match.py::test_group_definitions[simple]\": 0.0035629999997581763,\r\n    \"tests/unit/render/test_match.py::test_match_renderers\": 0.00402299999996103,\r\n    \"tests/unit/render/test_match.py::test_squash_plots_properties_config_files\": 0.003502900000057707,\r\n    \"tests/unit/render/test_match.py::test_squash_plots_properties_revs\": 0.003376799999841751,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[choose_x_y]\": 0.003819599999815182,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[default_x_y]\": 0.003854499999761174,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[find_in_nested_structure]\": 0.003715999999712949,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[multi_file_json]\": 0.0038222999999106833,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[multi_file_y_same_prefix]\": 0.003748200000018187,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[multi_source_json]\": 0.0037368000000697066,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[multi_source_y_single_x]\": 0.003875499999821841,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[multiple_x_fields]\": 0.004527599999846643,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[y_def_list]\": 0.003832700000202749,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[y_list]\": 0.0037419000000227243,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert[y_list_x_dict]\": 0.003806600000416438,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert_fail[unequal_datapoints]\": 0.003742300000112664,\r\n    \"tests/unit/render/test_vega_converter.py::test_convert_fail[unequal_x_y]\": 0.0038218999998207437,\r\n    \"tests/unit/render/test_vega_converter.py::test_finding_lists[dictionary0-expected_result0]\": 0.0035947999999734748,\r\n    \"tests/unit/render/test_vega_converter.py::test_finding_lists[dictionary1-expected_result1]\": 0.003548999999793523,\r\n    \"tests/unit/render/test_vega_converter.py::test_finding_lists[dictionary2-expected_result2]\": 0.0034938000001147884,\r\n    \"tests/unit/render/test_vega_converter.py::test_infer_x_label[properties0-actual]\": 0.0036790000001474255,\r\n    \"tests/unit/render/test_vega_converter.py::test_infer_x_label[properties1-actual]\": 0.0035590000002230227,\r\n    \"tests/unit/render/test_vega_converter.py::test_infer_x_label[properties2-x]\": 0.003653600000006918,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_celery_queue_kill[False]\": 3.2636135999998714,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_celery_queue_kill[True]\": 3.247582599999987,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_celery_queue_kill_invalid[False]\": 3.2468249000000924,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_celery_queue_kill_invalid[True]\": 3.2016183999999157,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_post_run_after_kill\": 5.27350279999996,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_queue_iter_done_task[FAILURE]\": 3.3001954999999725,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_queue_iter_done_task[SUCCESS]\": 3.2327950000001238,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_queue_status\": 3.2498809999999594,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_shutdown\": 3.241012300000193,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_shutdown_no_tasks\": 3.236263000000008,\r\n    \"tests/unit/repo/experiments/queue/test_celery.py::test_shutdown_with_kill\": 3.2628928999997697,\r\n    \"tests/unit/repo/experiments/queue/test_remove.py::test_remove_done\": 3.258274300000039,\r\n    \"tests/unit/repo/experiments/queue/test_remove.py::test_remove_queued\": 3.249447200000077,\r\n    \"tests/unit/repo/experiments/test_collect.py::test_collect_stable_sorting\": 18.420879500000183,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_celery_queue_failure_status\": 3.197412799999938,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_celery_queue_success_status\": 5.281382499999836,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_executor_status_compatibility\": 0.003536400000029971,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_workspace_executor_failed_status[tempdir_queue]\": 1.1872725000000628,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_workspace_executor_failed_status[workspace_queue]\": 0.8289185000000998,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_workspace_executor_success_status[tempdir_queue]\": 1.6061510000001817,\r\n    \"tests/unit/repo/experiments/test_executor_status.py::test_workspace_executor_success_status[workspace_queue]\": 1.1191812999998092,\r\n    \"tests/unit/repo/experiments/test_remove.py::test_remove_done_tasks\": 3.204113199999938,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_gen_random_name\": 0.0036457000001064443,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_resolve_exp_ref[False-False]\": 0.5896682000000055,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_resolve_exp_ref[False-True]\": 0.5865679999999429,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_resolve_exp_ref[True-False]\": 0.4884703999998692,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_resolve_exp_ref[True-True]\": 0.4549055000002227,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[*-False]\": 0.058686999999736145,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[:-False]\": 0.03801729999986492,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[?-False]\": 0.031393499999921914,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[@-result4]\": 0.043861699999979464,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[^-False]\": 0.03508149999970556,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[group/name-False]\": 0.04892050000012205,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[invalid/.name-False]\": 0.03710669999986749,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[na me-False]\": 0.03663880000021891,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[name-True]\": 0.03781759999992573,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_run_check_ref_format[~-False]\": 0.03606680000007145,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_to_studio_params[params0-expected0]\": 0.003655899999785106,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_to_studio_params[params1-expected1]\": 0.003651599999784594,\r\n    \"tests/unit/repo/experiments/test_utils.py::test_to_studio_params[params2-expected2]\": 0.0035170999999536434,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions[arg_revisions0-False-expected_revisions0]\": 0.004743899999994028,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions[arg_revisions1-True-expected_revisions1]\": 0.004207899999983056,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions[arg_revisions2-False-expected_revisions2]\": 0.0041298000001006585,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions[arg_revisions3-True-expected_revisions3]\": 0.004053900000144495,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions_experiment[arg_revisions0-v0-expected_revisions0]\": 0.004405799999858573,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions_experiment[arg_revisions1-None-expected_revisions1]\": 0.004275099999858867,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions_experiment[arg_revisions2-v0-expected_revisions2]\": 0.004378200000246579,\r\n    \"tests/unit/repo/plots/test_diff.py::test_revisions_experiment[arg_revisions3-None-expected_revisions3]\": 0.004301799999893774,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes0-False-expected_edges0]\": 0.0039300999999341,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes1-False-expected_edges1]\": 0.004124099999899045,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes10-True-expected_edges10]\": 0.004557500000373693,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes11-True-expected_edges11]\": 0.004562999999734529,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes12-True-expected_edges12]\": 0.004001199999947858,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes13-True-expected_edges13]\": 0.003916599999683967,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes2-False-expected_edges2]\": 0.003934500000013941,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes3-False-expected_edges3]\": 0.004986700000245037,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes4-False-expected_edges4]\": 0.004723999999896478,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes5-False-expected_edges5]\": 0.004219399999783491,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes6-False-expected_edges6]\": 0.003915600000027553,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes7-True-expected_edges7]\": 0.004030999999940832,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes8-True-expected_edges8]\": 0.0038394000000607775,\r\n    \"tests/unit/repo/test_graph.py::test_subgraph_of_nodes[nodes9-True-expected_edges9]\": 0.004678999999896405,\r\n    \"tests/unit/repo/test_open_repo.py::test_hook_is_called\": 4.525591000000077,\r\n    \"tests/unit/repo/test_open_repo.py::test_subrepo_is_constructed_properly[False]\": 0.9076153000000886,\r\n    \"tests/unit/repo/test_open_repo.py::test_subrepo_is_constructed_properly[True]\": 1.2559671999999864,\r\n    \"tests/unit/repo/test_repo.py::test_branch_config\": 0.638178200000084,\r\n    \"tests/unit/repo/test_repo.py::test_dynamic_cache_initialization\": 0.5202628000001823,\r\n    \"tests/unit/repo/test_repo.py::test_find_outs_by_path[dir\\\\\\\\subdir\\\\\\\\file]\": 0.22287480000022697,\r\n    \"tests/unit/repo/test_repo.py::test_find_outs_by_path[dir\\\\\\\\subdir]\": 0.23052469999993264,\r\n    \"tests/unit/repo/test_repo.py::test_find_outs_by_path[dir]\": 0.2600753999997778,\r\n    \"tests/unit/repo/test_repo.py::test_find_outs_by_path_does_graph_checks\": 0.22581250000007458,\r\n    \"tests/unit/repo/test_repo.py::test_is_dvc_internal\": 0.12822430000005625,\r\n    \"tests/unit/repo/test_repo.py::test_locked\": 0.0072508999999172374,\r\n    \"tests/unit/repo/test_repo.py::test_skip_graph_checks\": 0.8591042000000471,\r\n    \"tests/unit/repo/test_repo.py::test_used_objs[dir\\\\\\\\subdir\\\\\\\\file]\": 0.23300220000010086,\r\n    \"tests/unit/repo/test_repo.py::test_used_objs[dir\\\\\\\\subdir]\": 0.23801000000003114,\r\n    \"tests/unit/repo/test_reproduce.py::test_active_graph\": 0.003997900000058507,\r\n    \"tests/unit/repo/test_reproduce.py::test_number_reproduces\": 0.4025128999999197,\r\n    \"tests/unit/repo/test_reproduce.py::test_repro_plan\": 0.004954800000177784,\r\n    \"tests/unit/repo/test_scm_context.py::test_ignore[no_scm]\": 0.004981300000054034,\r\n    \"tests/unit/repo/test_scm_context.py::test_ignore[scm]\": 0.005258500000081767,\r\n    \"tests/unit/repo/test_scm_context.py::test_ignore_remove[no_scm]\": 0.005109800000127507,\r\n    \"tests/unit/repo/test_scm_context.py::test_ignore_remove[scm]\": 0.0052145999998174375,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_autostage_changed_files[no_scm]\": 0.0064303000001473265,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_autostage_changed_files[scm]\": 0.005826300000080664,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_clears_ignores_on_error[no_scm]\": 0.005321699999967677,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_clears_ignores_on_error[scm]\": 0.0055414000000837405,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_decorator[no_scm]\": 0.005799300000035146,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_decorator[scm]\": 0.006207899999935762,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[no_scm-False-False]\": 0.004690799999934825,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[no_scm-False-True]\": 0.004631500000186861,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[no_scm-True-False]\": 0.0046300999999857595,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[no_scm-True-True]\": 0.004604500000141343,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[scm-False-False]\": 0.004875800000036179,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[scm-False-True]\": 0.004849700000249868,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[scm-True-False]\": 0.0048351000000366184,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_on_no_files_to_track[scm-True-True]\": 0.004896400000006906,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_disable[no_scm]\": 0.004462099999727798,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_disable[scm]\": 0.004766199999721721,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_to_track[no_scm-False]\": 0.004654299999856448,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_to_track[no_scm-True]\": 0.004695799999808514,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_to_track[scm-False]\": 0.006858499999907508,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_remind_to_track[scm-True]\": 0.004867899999680958,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_reset_on_exit[no_scm]\": 0.005056499999909647,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_context_reset_on_exit[scm]\": 0.005627999999887834,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_track_changed_files[no_scm]\": 0.005414800000380637,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_track_changed_files[scm]\": 0.0056264999998347776,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_track_file[no_scm]\": 0.005126499999960288,\r\n    \"tests/unit/repo/test_scm_context.py::test_scm_track_file[scm]\": 0.0051109000000906235,\r\n    \"tests/unit/scm/test_scm.py::test_iter_revs\": 1.9997329999998783,\r\n    \"tests/unit/stage/test_cache.py::test_shared_stage_cache\": 0.32890530000008766,\r\n    \"tests/unit/stage/test_cache.py::test_stage_cache\": 0.4478663999998389,\r\n    \"tests/unit/stage/test_cache.py::test_stage_cache_params\": 0.4387864000002537,\r\n    \"tests/unit/stage/test_cache.py::test_stage_cache_wdir\": 0.43086459999994986,\r\n    \"tests/unit/stage/test_cache.py::test_unhashable[kwargs0]\": 0.13933509999992566,\r\n    \"tests/unit/stage/test_cache.py::test_unhashable[kwargs1]\": 0.15753200000017387,\r\n    \"tests/unit/stage/test_cache.py::test_unhashable[kwargs2]\": 0.1639799000001858,\r\n    \"tests/unit/stage/test_cache.py::test_unhashable[kwargs3]\": 0.13701670000000377,\r\n    \"tests/unit/stage/test_cache.py::test_unhashable[kwargs4]\": 0.18883580000010625,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_deps_outs\": 0.1840856999999687,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_dos2unix\": 0.16062810000016725,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_empty_data\": 0.13230139999996027,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_missing_checksums\": 0.13909800000010364,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_missing_params_section\": 0.1985528999998678,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_outs_isexec\": 0.17040280000014718,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_params\": 0.19260850000000573,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_use_appropriate_checksum\": 0.31557760000009694,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_fill_from_lock_with_missing_sections\": 0.16610119999995732,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_changed_command\": 0.12378579999995054,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage\": 0.1506573000001481,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_cmd_with_list\": 0.13581310000017766,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_mapping\": 0.16256889999999657,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_no_lock\": 0.15486989999999423,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_outs_with_flags\": 0.15815199999974539,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_wdir_and_path_correctly\": 0.1253830000000562,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_with_metrics_and_plots[metrics]\": 0.14104959999986022,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_with_metrics_and_plots[plots]\": 0.1346588999999767,\r\n    \"tests/unit/stage/test_loader_pipeline_file.py::test_load_stage_with_params\": 0.13948459999983243,\r\n    \"tests/unit/stage/test_run.py::test_run_stage_dry[cmd1-expected1]\": 0.13949279999997088,\r\n    \"tests/unit/stage/test_run.py::test_run_stage_dry[mycmd arg1 arg2-expected0]\": 0.13452050000000781,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_always_changed\": 0.13004399999999805,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_cmd\": 0.14637320000019827,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_deps_sorted\": 0.13880129999984092,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_frozen\": 0.15286340000011478,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order\": 0.159932900000058,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[None]\": 0.18510999999989508,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[deps]\": 0.22124350000012782,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[metrics]\": 0.1904506000003039,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[outs]\": 0.1972204999999576,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[params]\": 0.17652300000008836,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_order_deps_outs[plots]\": 0.21205989999975827,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_outs_and_outs_flags_are_sorted[metrics-extra1]\": 0.14520010000001093,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_outs_and_outs_flags_are_sorted[outs-extra2]\": 0.1622485999998844,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_outs_and_outs_flags_are_sorted[plots-extra0]\": 0.16353909999997995,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_outs_sorted\": 0.14247219999992922,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_params_file_sorted\": 0.1458096999999725,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_params_file_without_targets\": 0.16316560000018399,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_params_sorted\": 0.14448880000009012,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_plot_props\": 0.14348180000001776,\r\n    \"tests/unit/stage/test_serialize_pipeline_file.py::test_wdir\": 0.13083659999983865,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_dump_nondefault_hash\": 0.295127599999887,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock\": 0.18278179999970234,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_deps\": 0.1810011000002305,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_deps_order\": 0.19974980000006326,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs[metrics]\": 0.15444040000011228,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs[outs]\": 0.13414199999965604,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs[plots]\": 0.14159519999998338,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_isexec[metrics]\": 0.13434350000011364,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_isexec[outs]\": 0.14570949999983895,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_isexec[plots]\": 0.13978499999984706,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_order[metrics]\": 0.1530977999998413,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_order[outs]\": 0.15072440000017195,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_outs_order[plots]\": 0.14244639999969877,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params\": 0.19224449999978788,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params_file_sorted\": 0.16011409999987336,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params_no_values_filled\": 0.14611429999990833,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params_without_targets[None-expected0]\": 0.17541849999997794,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params_without_targets[info1-expected1]\": 0.13082460000009632,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_lock_params_without_targets[info2-expected2]\": 0.13176380000027166,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_order\": 0.1418648999999732,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_to_lockfile\": 0.16397269999993114,\r\n    \"tests/unit/stage/test_serialize_pipeline_lock.py::test_to_single_stage_lockfile_cloud_versioning_dir\": 0.1574033000001691,\r\n    \"tests/unit/stage/test_stage.py::test_always_changed\": 0.1821055000002616,\r\n    \"tests/unit/stage/test_stage.py::test_external_outs\": 0.16749239999990095,\r\n    \"tests/unit/stage/test_stage.py::test_meta_ignored\": 0.004416700000092533,\r\n    \"tests/unit/stage/test_stage.py::test_path_conversion\": 0.13386209999998755,\r\n    \"tests/unit/stage/test_stage.py::test_stage_checksum\": 0.004756299999826297,\r\n    \"tests/unit/stage/test_stage.py::test_stage_run_ignore_sigint\": 0.15540309999983037,\r\n    \"tests/unit/stage/test_stage.py::test_stage_update\": 0.16005749999999352,\r\n    \"tests/unit/stage/test_stage.py::test_wdir_default_ignored\": 0.004468200000019351,\r\n    \"tests/unit/stage/test_stage.py::test_wdir_non_default_is_not_ignored\": 0.004455800000187082,\r\n    \"tests/unit/stage/test_utils.py::test_get_stage_files\": 0.43559189999996306,\r\n    \"tests/unit/stage/test_utils.py::test_get_stage_files_wdir\": 0.36742880000019795,\r\n    \"tests/unit/stage/test_utils.py::test_resolve_paths\": 0.003733799999963594,\r\n    \"tests/unit/test_analytics.py::test_collect_and_send_report\": 0.033919500000138214,\r\n    \"tests/unit/test_analytics.py::test_is_enabled[config0-True]\": 0.17716590000009091,\r\n    \"tests/unit/test_analytics.py::test_is_enabled[config1-False]\": 0.1375153000001319,\r\n    \"tests/unit/test_analytics.py::test_is_enabled[config2-True]\": 0.13800270000024284,\r\n    \"tests/unit/test_analytics.py::test_is_enabled[config3-True]\": 0.12800289999995584,\r\n    \"tests/unit/test_analytics.py::test_is_enabled[config4-False]\": 0.11623309999981757,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[None-None-True]\": 0.1333523000000696,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[None-false-False]\": 0.12022549999983312,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[None-true-False]\": 0.12543059999984507,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[false-None-False]\": 0.1427845000000616,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[false-false-False]\": 0.13364479999995638,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[false-true-False]\": 0.13754279999989194,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[true-None-True]\": 0.13801170000010643,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[true-false-False]\": 0.1709238999999343,\r\n    \"tests/unit/test_analytics.py::test_is_enabled_env_neg[true-true-False]\": 0.12370190000001458,\r\n    \"tests/unit/test_analytics.py::test_runtime_info\": 0.12920549999989817,\r\n    \"tests/unit/test_analytics.py::test_send\": 0.10222620000013194,\r\n    \"tests/unit/test_analytics.py::test_system_info\": 0.004068200000119759,\r\n    \"tests/unit/test_api.py::test_open_raises_error_if_no_context\": 0.1751347999997961,\r\n    \"tests/unit/test_api.py::test_open_rev_raises_error_on_wrong_mode\": 0.18360870000014984,\r\n    \"tests/unit/test_collect.py::test_collect_duplicates\": 0.26829430000020693,\r\n    \"tests/unit/test_compare.py::test_diff_default\": 0.004596500000161541,\r\n    \"tests/unit/test_compare.py::test_diff_falsey_values\": 0.003511599999910686,\r\n    \"tests/unit/test_compare.py::test_diff_list[composite0-[2, 3]]\": 0.0037557999999080494,\r\n    \"tests/unit/test_compare.py::test_diff_list[composite1-{'foo': 3, 'bar': 3}]\": 0.0036715999999614723,\r\n    \"tests/unit/test_compare.py::test_diff_mocked[False]\": 0.00486189999992348,\r\n    \"tests/unit/test_compare.py::test_diff_mocked[True]\": 0.005840999999918495,\r\n    \"tests/unit/test_compare.py::test_diff_new\": 0.003453799999988405,\r\n    \"tests/unit/test_compare.py::test_diff_old_deleted\": 0.0035772999999608146,\r\n    \"tests/unit/test_compare.py::test_diff_sorted\": 0.003653499999700216,\r\n    \"tests/unit/test_compare.py::test_diff_table[Metric]\": 0.004011100000298029,\r\n    \"tests/unit/test_compare.py::test_diff_table[Param]\": 0.0034849999999551073,\r\n    \"tests/unit/test_compare.py::test_diff_table_precision\": 0.0033272000000579283,\r\n    \"tests/unit/test_compare.py::test_diff_table_rounding\": 0.003433800000266274,\r\n    \"tests/unit/test_compare.py::test_diff_table_with_value_column\": 0.0034550999998828047,\r\n    \"tests/unit/test_compare.py::test_diff_unsupported_diff_message[extra0-no diff]\": 0.0036456999998790707,\r\n    \"tests/unit/test_compare.py::test_diff_unsupported_diff_message[extra1--]\": 0.0036821000001054927,\r\n    \"tests/unit/test_compare.py::test_do_not_show_changes\": 0.0034590999998727057,\r\n    \"tests/unit/test_compare.py::test_metrics_diff_md\": 0.004767900000160807,\r\n    \"tests/unit/test_compare.py::test_metrics_show_default\": 0.004823699999633391,\r\n    \"tests/unit/test_compare.py::test_metrics_show_markdown\": 0.006294600000273931,\r\n    \"tests/unit/test_compare.py::test_metrics_show_mocked[False]\": 0.0048413999998047075,\r\n    \"tests/unit/test_compare.py::test_metrics_show_mocked[True]\": 0.0048140999999759515,\r\n    \"tests/unit/test_compare.py::test_metrics_show_precision\": 0.003947599999719387,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_different_metrics_header\": 0.0037142000003314024,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_multiple_revision\": 0.0037491000000500208,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_no_revision\": 0.003680699999904391,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_non_dict_values\": 0.003496899999845482,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_one_revision_multiple_paths\": 0.003716999999824111,\r\n    \"tests/unit/test_compare.py::test_metrics_show_with_valid_falsey_values\": 0.0035393000000567554,\r\n    \"tests/unit/test_compare.py::test_no_path\": 0.0033435000000281434,\r\n    \"tests/unit/test_config.py::test_feature_section_supports_arbitrary_values\": 0.005893000000014581,\r\n    \"tests/unit/test_config.py::test_get_fs\": 0.08485560000031,\r\n    \"tests/unit/test_config.py::test_load_configob_error\": 0.151444900000115,\r\n    \"tests/unit/test_config.py::test_load_unicode_error\": 0.20792520000009063,\r\n    \"tests/unit/test_config.py::test_resolve[../cache-D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc\\\\\\\\cache]\": 0.0039029999998092535,\r\n    \"tests/unit/test_config.py::test_resolve[D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc-D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc]\": 0.0036580000000867585,\r\n    \"tests/unit/test_config.py::test_resolve[cache-D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc\\\\\\\\conf_dir\\\\\\\\cache]\": 0.003570000000308937,\r\n    \"tests/unit/test_config.py::test_resolve[dir/cache-D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc\\\\\\\\conf_dir\\\\\\\\dir\\\\\\\\cache]\": 0.0036769000000731467,\r\n    \"tests/unit/test_config.py::test_resolve[ssh://some/path-ssh://some/path]\": 0.0035961999999472027,\r\n    \"tests/unit/test_config.py::test_resolve_homedir\": 0.0036099000001286186,\r\n    \"tests/unit/test_config.py::test_s3_ssl_verify\": 0.14650280000000748,\r\n    \"tests/unit/test_config.py::test_to_relpath[..\\\\\\\\cache-../../cache]\": 0.0038163999997777864,\r\n    \"tests/unit/test_config.py::test_to_relpath[D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc-D:\\\\\\\\a\\\\\\\\dvc\\\\\\\\dvc]\": 0.003559499999937543,\r\n    \"tests/unit/test_config.py::test_to_relpath[cache-../cache]\": 0.0038970000000517757,\r\n    \"tests/unit/test_config.py::test_to_relpath[ssh://some/path-ssh://some/path]\": 0.003644000000122105,\r\n    \"tests/unit/test_context.py::test_clone\": 0.005925200000092445,\r\n    \"tests/unit/test_context.py::test_context\": 0.0037342000000535336,\r\n    \"tests/unit/test_context.py::test_context_dict_ignores_keys_except_str\": 0.003621500000008382,\r\n    \"tests/unit/test_context.py::test_context_list\": 0.0036347000002479035,\r\n    \"tests/unit/test_context.py::test_context_setitem_getitem\": 0.004147799999827839,\r\n    \"tests/unit/test_context.py::test_load_from\": 0.01075610000020788,\r\n    \"tests/unit/test_context.py::test_load_from_raises_if_file_is_directory\": 0.12890109999989363,\r\n    \"tests/unit/test_context.py::test_load_from_raises_if_file_not_exist\": 0.13567880000050536,\r\n    \"tests/unit/test_context.py::test_loop_context\": 0.0036622000000079424,\r\n    \"tests/unit/test_context.py::test_merge_dict\": 0.003959100000201943,\r\n    \"tests/unit/test_context.py::test_merge_list\": 0.003910200000063924,\r\n    \"tests/unit/test_context.py::test_node_value\": 0.004252100000030623,\r\n    \"tests/unit/test_context.py::test_overwrite_with_setitem\": 0.003552399999989575,\r\n    \"tests/unit/test_context.py::test_repr\": 0.003907500000195796,\r\n    \"tests/unit/test_context.py::test_resolve_resolves_boolean_value\": 0.005912699999726101,\r\n    \"tests/unit/test_context.py::test_resolve_resolves_dict_keys\": 0.00771279999980834,\r\n    \"tests/unit/test_context.py::test_select\": 0.004463799999939511,\r\n    \"tests/unit/test_context.py::test_select_unwrap\": 0.004021400000056019,\r\n    \"tests/unit/test_context.py::test_track\": 0.021102499999869906,\r\n    \"tests/unit/test_context.py::test_track_from_multiple_files\": 0.024009399999840753,\r\n    \"tests/unit/test_daemon.py::test_daemon\": 0.0155182999999397,\r\n    \"tests/unit/test_daemon.py::test_no_recursive_spawn\": 0.010640700000067227,\r\n    \"tests/unit/test_dirs.py::test_global_config_dir_respects_env_var\": 0.0050538000000415195,\r\n    \"tests/unit/test_dvcfile.py::test_dump_stage\": 0.1760260000000926,\r\n    \"tests/unit/test_dvcfile.py::test_dvcfile_encoding_error\": 0.11111770000002252,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_file[../models/pipelines.yml]\": 0.00363799999990988,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_file[custom-pipelines.yaml]\": 0.003655600000229242,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_file[custom-pipelines.yml]\": 0.003573400000050242,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_file[pipelines.yaml]\": 0.004252900000210502,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_file[pipelines.yml]\": 0.004090399999995498,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_single_stage_file[../models/stage.dvc]\": 0.004619099999899845,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_single_stage_file[Dvcfile]\": 0.0036266999998133542,\r\n    \"tests/unit/test_dvcfile.py::test_pipelines_single_stage_file[stage.dvc]\": 0.00351099999966209,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_file_exists_but_dvcignored[dvc.yaml]\": 0.17553799999996045,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_file_exists_but_dvcignored[stage.dvc]\": 0.16315370000006624,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_invalid_data[dvc.yaml]\": 0.1786199000000579,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_invalid_data[stage.dvc]\": 0.17214519999970435,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_non_file[dvc.yaml]\": 0.1911176999997224,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_non_file[stage.dvc]\": 0.16595649999976558,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_not_existing_file[False-dvc.yaml]\": 0.1650908999999956,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_not_existing_file[False-stage.dvc]\": 0.17084939999972448,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_not_existing_file[True-dvc.yaml]\": 0.17394500000000335,\r\n    \"tests/unit/test_dvcfile.py::test_stage_load_on_not_existing_file[True-stage.dvc]\": 0.14360230000011143,\r\n    \"tests/unit/test_dvcfile.py::test_try_loading_dvcfile_that_is_gitignored[dvc.yaml]\": 0.16512570000008964,\r\n    \"tests/unit/test_dvcfile.py::test_try_loading_dvcfile_that_is_gitignored[foo.dvc]\": 0.14629939999986163,\r\n    \"tests/unit/test_hashinfo.py::test_as_raw\": 0.0038465999998607003,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[ to_ignore-patterns5-False]\": 0.006561899999951493,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[ to_ignore-patterns6-True]\": 0.007623000000194224,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[!to_ignore.txt-patterns10-True]\": 0.007035800000039671,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[#to_ignore-patterns3-True]\": 0.007036199999674864,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[#to_ignore-patterns4-False]\": 0.006590999999843916,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[.git\\\\\\\\file.txt-patterns25-True]\": 0.00703879999991841,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[2ile.txt-patterns32-False]\": 0.006501600000092367,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[\\\\\\\\full\\\\\\\\path\\\\\\\\to\\\\\\\\ignore\\\\\\\\file\\\\\\\\to_ignore-patterns15-True]\": 0.0076676999997289386,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\.dvc\\\\\\\\file.txt-patterns26-True]\": 0.00679479999985233,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\file-patterns12-False]\": 0.007175299999971685,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\file-patterns13-True]\": 0.007030900000017937,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\file.txt-patterns18-True]\": 0.007021999999778927,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\file.txt-patterns20-True]\": 0.0077042999998866435,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\file.txt-patterns46-True]\": 0.006923100000221893,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\p\\\\\\\\file.txt-patterns47-False]\": 0.00848039999982575,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\path-patterns24-False]\": 0.008022099999834609,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\subdir\\\\\\\\file.txt-patterns19-True]\": 0.00679809999974168,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\subdir\\\\\\\\file.txt-patterns21-True]\": 0.00670669999999518,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\subdir\\\\\\\\file.txt-patterns22-True]\": 0.006637000000182525,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[data\\\\\\\\subdir\\\\\\\\file.txt-patterns23-False]\": 0.006796199999826058,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[dont_ignore.txt-patterns1-False]\": 0.007163100000070699,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[fi/e.txt-patterns30-False]\": 0.00652780000018538,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[file-patterns11-True]\": 0.007933799999818802,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[file.txt-patterns28-True]\": 0.006892299999890383,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[file.txt-patterns29-True]\": 0.0077467999999498716,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[file.txt-patterns31-True]\": 0.006814599999870552,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[other\\\\\\\\data\\\\\\\\file-patterns14-False]\": 0.006615400000328009,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[path\\\\\\\\to_ignore.txt-patterns17-False]\": 0.006943200000023353,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[path\\\\\\\\to_ignore.txt-patterns44-False]\": 0.0071652999999969325,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[path\\\\\\\\to_ignore.txt-patterns45-True]\": 0.00705799999991541,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\p2\\\\\\\\to_ignore-patterns33-True]\": 0.007391700000198398,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\p2\\\\\\\\to_ignore-patterns34-True]\": 0.006901199999902019,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\p2\\\\\\\\to_ignore-patterns36-True]\": 0.00679960000002211,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\p2\\\\\\\\to_ignore-patterns37-False]\": 0.006791200000179742,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\p2\\\\\\\\to_ignore-patterns40-True]\": 0.006612099999983911,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\p\\\\\\\\to_ignore-patterns39-True]\": 0.006811799999923096,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\dont_ignore-patterns35-False]\": 0.006664599999794518,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\dont_ignore-patterns38-True]\": 0.0074474000000464,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\dont_ignore-patterns41-False]\": 0.0064676999998027895,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\dont_ignore-patterns42-False]\": 0.007750099999611848,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\to_ignore-patterns27-False]\": 0.006933299999900555,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[rel\\\\\\\\path\\\\\\\\path2\\\\\\\\to_ignore-patterns48-False]\": 0.0072572999999920285,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore-patterns0-True]\": 0.007687200000191297,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore-patterns2-True]\": 0.00741150000021662,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore.txt-patterns16-True]\": 0.007188699999915116,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore.txt-patterns43-True]\": 0.007081200000357057,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore.txt-patterns7-True]\": 0.006782300000168107,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore.txt-patterns8-False]\": 0.008190399999648434,\r\n    \"tests/unit/test_ignore.py::test_match_ignore_from_file[to_ignore.txt-patterns9-True]\": 0.007188999999698353,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.dvc-]\": 0.003738300000122763,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.dvc-dir]\": 0.0037137999997867155,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.git-]\": 0.004464200000256824,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.git-dir]\": 0.0037531999996645027,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.hg-]\": 0.0037776000001485954,\r\n    \"tests/unit/test_ignore.py::test_should_ignore_dir[.hg-dir]\": 0.0037257000001318374,\r\n    \"tests/unit/test_imports.py::test_no_remote_imports\": 0.09562540000001718,\r\n    \"tests/unit/test_info.py::test_caches\": 0.2690287000002627,\r\n    \"tests/unit/test_info.py::test_fs_info_in_repo\": 0.3141158000003088,\r\n    \"tests/unit/test_info.py::test_fs_info_outside_of_repo\": 0.0716359000000466,\r\n    \"tests/unit/test_info.py::test_info_in_broken_git_repo\": 0.2816538000001856,\r\n    \"tests/unit/test_info.py::test_info_in_repo[False]\": 0.39633419999995567,\r\n    \"tests/unit/test_info.py::test_info_in_repo[True]\": 1.6630297999997765,\r\n    \"tests/unit/test_info.py::test_info_in_subdir\": 0.4202313000000686,\r\n    \"tests/unit/test_info.py::test_info_outside_of_repo\": 0.07112589999974261,\r\n    \"tests/unit/test_info.py::test_plugin_versions\": 0.24081580000006397,\r\n    \"tests/unit/test_info.py::test_remotes\": 0.32117399999992813,\r\n    \"tests/unit/test_info.py::test_remotes_empty\": 0.3108306999999968,\r\n    \"tests/unit/test_interpolate.py::test_escape[Great shot kid, that was \\\\\\\\${value} in a ${value}-Great shot kid, that was ${value} in a value]\": 0.003911600000037652,\r\n    \"tests/unit/test_interpolate.py::test_escape[May the \\\\\\\\${value} be with you-May the ${value} be with you]\": 0.0035144999997100967,\r\n    \"tests/unit/test_interpolate.py::test_escape[Month of \\\\\\\\${value}-Month of ${value}]\": 0.003654400000186797,\r\n    \"tests/unit/test_interpolate.py::test_escape[\\\\\\\\${ value } days-${ value } days]\": 0.0034967999997661536,\r\n    \"tests/unit/test_interpolate.py::test_escape[\\\\\\\\${ value }-${ value }]\": 0.003624200000103883,\r\n    \"tests/unit/test_interpolate.py::test_escape[\\\\\\\\${value}-${value}]\": 0.00351009999985763,\r\n    \"tests/unit/test_interpolate.py::test_resolve_collection\": 0.006243999999924199,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[-${ item }-item]\": 0.004020400000172231,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[-${value}-value]\": 0.003885999999965861,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[0_0-${ item }-item]\": 0.003973599999881117,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[0_0-${value}-value]\": 0.004023799999686162,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[0_1-${ item }-item]\": 0.004107700000076875,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[0_1-${value}-value]\": 0.003916500000059386,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[12-${ item }-item]\": 0.003985800000009476,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[12-${value}-value]\": 0.004028799999787225,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[123-${ item }-item]\": 0.0040129999999862775,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[123-${value}-value]\": 0.004017700000076729,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[3.141592653589793-${ item }-item]\": 0.003979100000151448,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[3.141592653589793-${value}-value]\": 0.004051200000048993,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[30000.0-${ item }-item]\": 0.004006899999922098,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[30000.0-${value}-value]\": 0.004021100000045408,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[False-${ item }-item]\": 0.003932200000008379,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[False-${value}-value]\": 0.003958599999805301,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[Foobar-${ item }-item]\": 0.004025900000215188,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[Foobar-${value}-value]\": 0.004651299999977709,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[None-${ item }-item]\": 0.004000000000132786,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[None-${value}-value]\": 0.003897699999924953,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[True-${ item }-item]\": 0.0040805000001000735,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[True-${value}-value]\": 0.004538399999773901,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[inf-${ item }-item]\": 0.003844300000309886,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitive_values[inf-${value}-value]\": 0.004028599999855942,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitives_dict_access\": 0.005663499999855048,\r\n    \"tests/unit/test_interpolate.py::test_resolve_primitives_list_access\": 0.00799919999985832,\r\n    \"tests/unit/test_interpolate.py::test_resolve_str\": 0.004223900000170033,\r\n    \"tests/unit/test_interpolate.py::test_resolve_unicode\": 0.005988900000147623,\r\n    \"tests/unit/test_lockfile.py::test_load_when_lockfile_does_not_exist\": 0.11878550000005816,\r\n    \"tests/unit/test_lockfile.py::test_load_when_lockfile_is_corrupted[corrupt_data0]\": 0.11560540000004949,\r\n    \"tests/unit/test_lockfile.py::test_load_when_lockfile_is_corrupted[corrupt_data1]\": 0.1336421999999402,\r\n    \"tests/unit/test_lockfile.py::test_load_when_lockfile_is_corrupted[corrupt_data2]\": 0.12180469999998422,\r\n    \"tests/unit/test_lockfile.py::test_load_when_lockfile_is_corrupted[corrupt_data3]\": 0.12833260000002156,\r\n    \"tests/unit/test_lockfile.py::test_stage_dump_no_outs_deps\": 0.14247329999989233,\r\n    \"tests/unit/test_lockfile.py::test_stage_dump_when_already_exists\": 0.15514020000023265,\r\n    \"tests/unit/test_lockfile.py::test_stage_dump_with_deps_and_outs\": 0.14595160000021679,\r\n    \"tests/unit/test_lockfile.py::test_stage_overwrites_if_already_exists\": 0.14148910000017167,\r\n    \"tests/unit/test_lockfile.py::test_try_loading_lockfile_that_is_gitignored[False-False]\": 0.15414799999985007,\r\n    \"tests/unit/test_lockfile.py::test_try_loading_lockfile_that_is_gitignored[False-True]\": 0.15343560000019352,\r\n    \"tests/unit/test_lockfile.py::test_try_loading_lockfile_that_is_gitignored[True-False]\": 0.13935349999997015,\r\n    \"tests/unit/test_lockfile.py::test_try_loading_lockfile_that_is_gitignored[True-True]\": 0.14020170000003418,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_debug\": 0.005059300000084477,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_error\": 0.003906399999777932,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_exc_info_on_other_record_types\": 0.005675999999994019,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_exception\": 0.004515699999956269,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_exception_under_verbose\": 0.004960199999914039,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_exception_with_description_and_message\": 0.0041831000000911445,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_exception_with_description_and_without_message\": 0.004184900000154812,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_info\": 0.00401119999992261,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_nested_exceptions\": 0.005140400000072987,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_progress_awareness\": 0.005746700000145211,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_tb_only\": 0.0048854000001483655,\r\n    \"tests/unit/test_logger.py::TestColorFormatter::test_warning\": 0.004090800000085437,\r\n    \"tests/unit/test_logger.py::test_add_existing_level\": 0.004684700000098019,\r\n    \"tests/unit/test_logger.py::test_handlers\": 0.003505800000311865,\r\n    \"tests/unit/test_logger.py::test_info_with_debug_loglevel_shows_no_datetime\": 0.0048177999997278675,\r\n    \"tests/unit/test_logger.py::test_logging_debug_with_datetime\": 0.005303899999944406,\r\n    \"tests/unit/test_metrics.py::test_metrics_order\": 0.2618385999999191,\r\n    \"tests/unit/test_params.py::test_params_order\": 0.24566819999972722,\r\n    \"tests/unit/test_params.py::test_repro_unicode\": 0.22142780000012863,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[ space-/dir-dir/**/space]\": 0.01486069999987194,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[!include-/dir-!/dir/**/include]\": 0.013259799999786992,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[#comment-/dir-#comment]\": 0.016021900000168898,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[***/file.txt-/dir-dir/***/file.txt]\": 0.012832699999989927,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[***file-/dir-dir/**/***file]\": 0.013109799999710958,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[**/foo-/dir-dir/**/foo]\": 0.013117799999918134,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[**/foo/bar-/dir-dir/**/foo/bar]\": 0.01453029999993305,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[*aste*risk*-/dir-dir/**/*aste*risk*]\": 0.013277600000037637,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[/***.txt-/dir-dir/***.txt]\": 0.013214099999913742,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[/separator.txt-/dir-dir/separator.txt]\": 0.012841600000001563,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[?fi?le?-/dir-dir/**/?fi?le?]\": 0.013020799999821975,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[[a-zA-Z]file[a-zA-Z]-/dir-dir/**/[a-zA-Z]file[a-zA-Z]]\": 0.01323380000008001,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[\\\\\\\\ space-/dir-dir/**/ space]\": 0.013491599999724713,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[\\\\\\\\!important!.txt-/dir-dir/**/!important!.txt]\": 0.013096599999926184,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[\\\\\\\\#hash-/#dir-#dir/**/#hash]\": 0.016997200000105295,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[\\\\\\\\#hash-/dir-dir/**/#hash]\": 0.014032099999894854,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[a/***/b-/dir-dir/a/***/b]\": 0.014123800000106712,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[a/**/b-/dir-dir/a/**/b]\": 0.013111100000060105,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[abc/**-/dir-dir/abc/**]\": 0.013637900000276204,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[data/***-/dir-dir/data/***]\": 0.012775599999940823,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[doc/fortz/-/dir-dir/doc/fortz/]\": 0.016302199999927325,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[fortz/-/dir-dir/**/fortz/]\": 0.013589400000000751,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[no_sep-/dir-dir/**/no_sep]\": 0.01280029999998078,\r\n    \"tests/unit/test_pathspec_math.py::test_dvcignore_pattern_change_dir[subdir/separator.txt-/dir-dir/subdir/separator.txt]\": 0.012839100000064718,\r\n    \"tests/unit/test_progress.py::test_default\": 0.004811299999801122,\r\n    \"tests/unit/test_progress.py::test_quiet_logging\": 0.004419799999823226,\r\n    \"tests/unit/test_progress.py::test_quiet_logging_disable_false\": 0.004555699999855278,\r\n    \"tests/unit/test_progress.py::test_quiet_notty\": 0.004390899999862086,\r\n    \"tests/unit/test_prompt.py::test_confirm_in_tty_if_stdin_is_closed\": 0.0051822999998876185,\r\n    \"tests/unit/test_run.py::test_invalid_stage_names[copy$name]\": 0.003373499999725027,\r\n    \"tests/unit/test_run.py::test_invalid_stage_names[copy-name?]\": 0.0034620000001268636,\r\n    \"tests/unit/test_run.py::test_invalid_stage_names[copy-name@v1]\": 0.0033856999998533865,\r\n    \"tests/unit/test_run.py::test_valid_stage_names[12]\": 0.003457799999978306,\r\n    \"tests/unit/test_run.py::test_valid_stage_names[copy-name]\": 0.003441499999780717,\r\n    \"tests/unit/test_run.py::test_valid_stage_names[copyName]\": 0.003389200000128767,\r\n    \"tests/unit/test_run.py::test_valid_stage_names[copy_name]\": 0.003440200000113691,\r\n    \"tests/unit/test_rwlock.py::test_broken_rwlock\": 0.014477100000021892,\r\n    \"tests/unit/test_rwlock.py::test_corrupted_rwlock[False]\": 0.01603619999991679,\r\n    \"tests/unit/test_rwlock.py::test_corrupted_rwlock[True]\": 0.013812900000175432,\r\n    \"tests/unit/test_rwlock.py::test_rwlock\": 0.020905200000242985,\r\n    \"tests/unit/test_rwlock.py::test_rwlock_edit_is_guarded\": 0.09206210000024839,\r\n    \"tests/unit/test_rwlock.py::test_rwlock_reentrant\": 0.022661599999764803,\r\n    \"tests/unit/test_rwlock.py::test_rwlock_subdirs\": 0.023793199999772696,\r\n    \"tests/unit/test_scm.py::test_resolve_rev_empty_git_repo\": 0.029855400000087684,\r\n    \"tests/unit/test_tabular_data.py::test_dict_like_interfaces\": 0.0033276999999998225,\r\n    \"tests/unit/test_tabular_data.py::test_drop\": 0.0033434999998007697,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates[cols-expected1-True]\": 0.0036497000000963453,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates[cols-expected2-False]\": 0.003721400000131325,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates[rows-expected0-True]\": 0.004074500000115222,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates_invalid_axis\": 0.003273099999887563,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates_rich_text\": 0.003615700000182187,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates_subset[cols-subset2-expected2]\": 0.0037198000002263143,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates_subset[rows-subset0-expected0]\": 0.00396879999993871,\r\n    \"tests/unit/test_tabular_data.py::test_drop_duplicates_subset[rows-subset1-expected1]\": 0.00402999999982967,\r\n    \"tests/unit/test_tabular_data.py::test_dropna[cols-all-data3-expected3]\": 0.0043358999998872605,\r\n    \"tests/unit/test_tabular_data.py::test_dropna[cols-any-data2-expected2]\": 0.003774799999746392,\r\n    \"tests/unit/test_tabular_data.py::test_dropna[rows-all-data1-expected1]\": 0.0036873999997624196,\r\n    \"tests/unit/test_tabular_data.py::test_dropna[rows-any-data0-expected0]\": 0.0037959999999657157,\r\n    \"tests/unit/test_tabular_data.py::test_dropna_invalid_axis\": 0.00358570000025793,\r\n    \"tests/unit/test_tabular_data.py::test_dropna_subset[cols-expected0]\": 0.003985100000363673,\r\n    \"tests/unit/test_tabular_data.py::test_dropna_subset[rows-expected1]\": 0.003571299999975963,\r\n    \"tests/unit/test_tabular_data.py::test_fill_value\": 0.0044410999998945044,\r\n    \"tests/unit/test_tabular_data.py::test_list_operations\": 0.0036390999998729967,\r\n    \"tests/unit/test_tabular_data.py::test_protected\": 0.0034229999998842686,\r\n    \"tests/unit/test_tabular_data.py::test_row_from_dict\": 0.003386400000181311,\r\n    \"tests/unit/test_tabular_data.py::test_table_empty\": 0.00446330000022499,\r\n    \"tests/unit/test_updater.py::test_check\": 0.024595300000100906,\r\n    \"tests/unit/test_updater.py::test_check_fetches_on_invalid_data_format\": 0.021974799999952666,\r\n    \"tests/unit/test_updater.py::test_check_refetches_each_day\": 0.017732699999896795,\r\n    \"tests/unit/test_updater.py::test_check_update_respect_config[False]\": 0.017301200000247263,\r\n    \"tests/unit/test_updater.py::test_check_update_respect_config[True]\": 0.01794519999998556,\r\n    \"tests/unit/test_updater.py::test_check_updates[ahead]\": 0.021424599999818383,\r\n    \"tests/unit/test_updater.py::test_check_updates[behind]\": 0.0198830999997881,\r\n    \"tests/unit/test_updater.py::test_check_updates[uptodate]\": 0.019249299999728464,\r\n    \"tests/unit/test_updater.py::test_fetch\": 0.019140100000186067,\r\n    \"tests/unit/test_updater.py::test_is_enabled[config0-True]\": 0.13130560000013247,\r\n    \"tests/unit/test_updater.py::test_is_enabled[config1-True]\": 0.14342600000009043,\r\n    \"tests/unit/test_updater.py::test_is_enabled[config2-False]\": 0.13593219999984285,\r\n    \"tests/unit/test_updater.py::test_notify_message[None-Find the latest release at https://github.com/treeverse/dvc/releases/latest.]\": 0.014024399999925663,\r\n    \"tests/unit/test_updater.py::test_notify_message[binary-To upgrade, uninstall dvc and reinstall from https://dvc.org.]\": 0.013217200000326557,\r\n    \"tests/unit/test_updater.py::test_notify_message[brew-To upgrade, run 'brew upgrade dvc'.]\": 0.013467300000229443,\r\n    \"tests/unit/test_updater.py::test_notify_message[choco-To upgrade, run 'choco upgrade dvc'.]\": 0.014645900000004985,\r\n    \"tests/unit/test_updater.py::test_notify_message[conda-To upgrade, run 'conda update dvc'.]\": 0.01598150000017995,\r\n    \"tests/unit/test_updater.py::test_notify_message[deb-To upgrade, run 'apt-get install --only-upgrade dvc'.]\": 0.014986099999987346,\r\n    \"tests/unit/test_updater.py::test_notify_message[exe-To upgrade, uninstall dvc and reinstall from https://dvc.org.]\": 0.01401350000014645,\r\n    \"tests/unit/test_updater.py::test_notify_message[osxpkg-To upgrade, uninstall dvc and reinstall from https://dvc.org.]\": 0.013980499999888707,\r\n    \"tests/unit/test_updater.py::test_notify_message[pip-To upgrade, run 'pip install --upgrade dvc'.]\": 0.015155299999833005,\r\n    \"tests/unit/test_updater.py::test_notify_message[rpm-To upgrade, run 'yum update dvc'.]\": 0.014036500000202068,\r\n    \"tests/unit/test_updater.py::test_notify_message[unknown-Find the latest release at https://github.com/treeverse/dvc/releases/latest.]\": 0.01609110000003966,\r\n    \"tests/unit/ui/test_console.py::test_capsys_works\": 0.003642499999841675,\r\n    \"tests/unit/ui/test_console.py::test_write\": 0.004320600000255581,\r\n    \"tests/unit/ui/test_console.py::test_write_json[False-{\\\"hello\\\": \\\"world\\\", \\\"date\\\": \\\"1970-01-01 00:00:00\\\"}\\\\n]\": 0.004433300000073359,\r\n    \"tests/unit/ui/test_console.py::test_write_json[True-{\\\\n  \\\"hello\\\": \\\"world\\\",\\\\n  \\\"date\\\": \\\"1970-01-01 00:00:00\\\"\\\\n}\\\\n]\": 0.006361599999763712,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_default_options_on_less_without_less_env[DVC_PAGER]\": 0.004813199999944118,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_default_options_on_less_without_less_env[None]\": 0.004666300000053525,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_default_options_on_less_without_less_env[PAGER]\": 0.005321299999877738,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_some_options_on_less_if_less_env_defined[DVC_PAGER]\": 0.004765900000165857,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_some_options_on_less_if_less_env_defined[None]\": 0.004783499999803098,\r\n    \"tests/unit/ui/test_pager.py::test_dvc_sets_some_options_on_less_if_less_env_defined[PAGER]\": 0.005055099999935919,\r\n    \"tests/unit/ui/test_pager.py::test_find_pager_fails_to_find_any_pager\": 0.005119300000160365,\r\n    \"tests/unit/ui/test_pager.py::test_find_pager_uses_custom_pager_when_dvc_pager_env_var_is_defined\": 0.004193899999791029,\r\n    \"tests/unit/ui/test_pager.py::test_find_pager_uses_custom_pager_when_pager_env_is_defined\": 0.004189700000324592,\r\n    \"tests/unit/ui/test_pager.py::test_find_pager_uses_default_pager_when_found\": 0.004566800000020521,\r\n    \"tests/unit/ui/test_pager.py::test_find_pager_when_not_isatty\": 0.0042246000000432105,\r\n    \"tests/unit/ui/test_pager.py::test_make_pager_when_no_pager_found\": 0.003733799999963594,\r\n    \"tests/unit/ui/test_pager.py::test_pager\": 0.005270399999744768,\r\n    \"tests/unit/ui/test_table.py::test_empty[False]\": 0.003731199999947421,\r\n    \"tests/unit/ui/test_table.py::test_empty[True]\": 0.004219100000000253,\r\n    \"tests/unit/ui/test_table.py::test_empty_markdown\": 0.003875900000139154,\r\n    \"tests/unit/ui/test_table.py::test_plain\": 0.004061899999896923,\r\n    \"tests/unit/ui/test_table.py::test_plain_headerless\": 0.004709599999841885,\r\n    \"tests/unit/ui/test_table.py::test_plain_md\": 0.003960700000334327,\r\n    \"tests/unit/ui/test_table.py::test_plain_pager\": 0.004913799999940238,\r\n    \"tests/unit/ui/test_table.py::test_rich_border\": 0.005841400000008434,\r\n    \"tests/unit/ui/test_table.py::test_rich_headerless\": 0.005394500000193148,\r\n    \"tests/unit/ui/test_table.py::test_rich_pager\": 0.0065830999999434425,\r\n    \"tests/unit/ui/test_table.py::test_rich_simple\": 0.005705299999817726,\r\n    \"tests/unit/ui/test_table.py::test_rich_styles[extra_opts0]\": 0.006005899999991016,\r\n    \"tests/unit/ui/test_table.py::test_rich_styles[extra_opts1]\": 0.005567899999959991,\r\n    \"tests/unit/ui/test_table.py::test_rich_styles[extra_opts2]\": 0.0056007999999110325,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_invalid_types[CONSTRUCTOR = dict(a=1, b=2)]\": 0.003520100000059756,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_invalid_types[SUM = 1 + 2]\": 0.003734900000381458,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[BOOL = True-result0]\": 0.004876599999988684,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[DICT = {'a': 1, 'b': 2}-result4]\": 0.0035950000001321314,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[FLOAT = 0.001-result2]\": 0.0036241000002519286,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[INT = 5-result1]\": 0.0037746000002698565,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[LIST = [1, 2, 3]-result5]\": 0.0043877999999040185,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[NONE = None-result8]\": 0.003579800000125033,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[SET = {1, 2, 3}-result6]\": 0.003589599999713755,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[STR = 'abc'-result3]\": 0.003801699999939956,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[TUPLE = (10, 100)-result7]\": 0.003691700000217679,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[UNARY_OP = -1-result9]\": 0.003852600000300299,\r\n    \"tests/unit/utils/serialize/test_python.py::test_parse_valid_types[class TrainConfig:\\\\n\\\\n            EPOCHS = 70\\\\n\\\\n            def __init__(self):\\\\n                self.layers = 5\\\\n                self.layers = 9  # TrainConfig.layers param will be 9\\\\n                bar = 3  # Will NOT be found since it's locally scoped\\\\n            -result10]\": 0.0037390999998478947,\r\n    \"tests/unit/utils/serialize/test_toml.py::test_parse_toml_for_update\": 0.004269399999657253,\r\n    \"tests/unit/utils/serialize/test_toml.py::test_parse_toml_type\": 0.004482099999904676,\r\n    \"tests/unit/utils/serialize/test_toml.py::test_preserve_comments\": 0.018339500000138287,\r\n    \"tests/unit/utils/serialize/test_yaml.py::test_parse_yaml_duplicate_key_error\": 0.004337799999802883,\r\n    \"tests/unit/utils/serialize/test_yaml.py::test_parse_yaml_invalid_unicode\": 0.017577299999857132,\r\n    \"tests/unit/utils/test_cli_parse.py::test_parse_params\": 0.0034523000001627224,\r\n    \"tests/unit/utils/test_cli_parse.py::test_to_path_overrides[params0-expected0]\": 0.003620999999839114,\r\n    \"tests/unit/utils/test_cli_parse.py::test_to_path_overrides[params1-expected1]\": 0.0034564000004593254,\r\n    \"tests/unit/utils/test_cli_parse.py::test_to_path_overrides[params2-expected2]\": 0.0035394000001360837,\r\n    \"tests/unit/utils/test_cli_parse.py::test_to_path_overrides[params3-expected3]\": 0.003496000000041022,\r\n    \"tests/unit/utils/test_cli_parse.py::test_to_path_overrides[params4-expected4]\": 0.0035584000002018,\r\n    \"tests/unit/utils/test_collections.py::test_apply_diff_is_inplace\": 0.0033515000000079453,\r\n    \"tests/unit/utils/test_collections.py::test_apply_diff_mapping\": 0.003440900000214242,\r\n    \"tests/unit/utils/test_collections.py::test_apply_diff_seq\": 0.00329320000014377,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes0-expected0]\": 0.004868100000294362,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes1-expected1]\": 0.004610599999978149,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes2-expected2]\": 0.00518589999978758,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes3-expected3]\": 0.005508199999894714,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes4-expected4]\": 0.005414699999846562,\r\n    \"tests/unit/utils/test_collections.py::test_merge_dicts[changes5-expected5]\": 0.005224300000236326,\r\n    \"tests/unit/utils/test_collections.py::test_remove_missing_keys[changes0-expected0]\": 0.004527299999836032,\r\n    \"tests/unit/utils/test_collections.py::test_remove_missing_keys[changes1-expected1]\": 0.004726400000208741,\r\n    \"tests/unit/utils/test_collections.py::test_remove_missing_keys[changes2-expected2]\": 0.004119200000104684,\r\n    \"tests/unit/utils/test_collections.py::test_to_omegaconf\": 0.004031899999972666,\r\n    \"tests/unit/utils/test_executors.py::test_cancel_futures[False-False]\": 2.7464563000000908,\r\n    \"tests/unit/utils/test_executors.py::test_cancel_futures[False-True]\": 2.7359928000000764,\r\n    \"tests/unit/utils/test_executors.py::test_cancel_futures[True-False]\": 0.11289850000025581,\r\n    \"tests/unit/utils/test_executors.py::test_cancel_futures[True-True]\": 0.12021770000023935,\r\n    \"tests/unit/utils/test_executors.py::test_cancel_on_error_context_manager\": 0.21685029999980543,\r\n    \"tests/unit/utils/test_fs.py::test_contains_symlink_case_sensitive_posix\": 0.0011397000000670232,\r\n    \"tests/unit/utils/test_fs.py::test_contains_symlink_case_sensitive_win\": 0.003489700000045559,\r\n    \"tests/unit/utils/test_fs.py::test_makedirs\": 0.0147185000000718,\r\n    \"tests/unit/utils/test_fs.py::test_path_isin_case_sensitive\": 0.0033009999997375417,\r\n    \"tests/unit/utils/test_fs.py::test_path_isin_on_common_substring_path\": 0.003304300000309013,\r\n    \"tests/unit/utils/test_fs.py::test_path_isin_on_same_path\": 0.00346199999989949,\r\n    \"tests/unit/utils/test_fs.py::test_path_isin_positive\": 0.003388500000028216,\r\n    \"tests/unit/utils/test_fs.py::test_path_isin_with_absolute_path\": 0.003385500000376851,\r\n    \"tests/unit/utils/test_fs.py::test_path_object_and_str_are_valid_arg_types\": 0.004334300000209623,\r\n    \"tests/unit/utils/test_fs.py::test_relpath_windows_different_drives\": 0.0035064999999576685,\r\n    \"tests/unit/utils/test_fs.py::test_remove\": 0.01732960000026651,\r\n    \"tests/unit/utils/test_fs.py::test_should_call_recursive_on_no_condition_matched\": 0.004912599999897793,\r\n    \"tests/unit/utils/test_fs.py::test_should_raise_exception_on_base_path_not_in_path\": 0.0035533000000214088,\r\n    \"tests/unit/utils/test_fs.py::test_should_return_false_on_no_more_dirs_below_path\": 0.004458399999975882,\r\n    \"tests/unit/utils/test_fs.py::test_should_return_false_on_path_eq_to_base_path\": 0.0038337000000865373,\r\n    \"tests/unit/utils/test_fs.py::test_should_return_false_when_base_path_is_symlink\": 0.0040107999998326704,\r\n    \"tests/unit/utils/test_fs.py::test_should_return_true_on_symlink_in_path\": 0.00500249999981861,\r\n    \"tests/unit/utils/test_humanize.py::test_get_summary\": 0.003476800000271396,\r\n    \"tests/unit/utils/test_humanize.py::test_truncate_text\": 0.004332799999929193,\r\n    \"tests/unit/utils/test_humanize.py::test_truncate_text_smaller_than_max_length[False]\": 0.0036585999996532337,\r\n    \"tests/unit/utils/test_humanize.py::test_truncate_text_smaller_than_max_length[True]\": 0.0036007999999583262,\r\n    \"tests/unit/utils/test_plots.py::test_get_plot_id\": 0.0035087999999632302,\r\n    \"tests/unit/utils/test_plots.py::test_group_definitions_by_id\": 0.003671100000019578,\r\n    \"tests/unit/utils/test_studio.py::test_config_to_env\": 0.0034047000001464767,\r\n    \"tests/unit/utils/test_studio.py::test_env_to_config\": 0.0033028999998805375,\r\n    \"tests/unit/utils/test_studio.py::test_get_repo_url[None-None]\": 1.1035554000002321,\r\n    \"tests/unit/utils/test_studio.py::test_get_repo_url[http://url-http://url]\": 1.1347399999999652,\r\n    \"tests/unit/utils/test_studio.py::test_get_repo_url[origin-git@url]\": 1.0518679999997858,\r\n    \"tests/unit/utils/test_studio.py::test_notify_refs[200-side_effect0]\": 0.005626399999982823,\r\n    \"tests/unit/utils/test_studio.py::test_notify_refs[401-side_effect1]\": 0.00575210000010884,\r\n    \"tests/unit/utils/test_studio.py::test_notify_refs[500-ValueError]\": 0.005458200000020952,\r\n    \"tests/unit/utils/test_utils.py::test_dict_sha256[d0-f472eda60f09660a4750e8b3208cf90b3a3b24e5f42e0371d829710e9464d74a]\": 0.004516400000284193,\r\n    \"tests/unit/utils/test_utils.py::test_dict_sha256[d1-a239b67073bd58affcdb81fff3305d1726c6e7f9c86f3d4fca0e92e8147dc7b0]\": 81.51737459999981,\r\n    \"tests/unit/utils/test_utils.py::test_fix_env_pyenv[/orig/path1:/orig/path2-/orig/path1:/orig/path2]\": 0.0011337999999341264,\r\n    \"tests/unit/utils/test_utils.py::test_fix_env_pyenv[/orig/path1:/orig/path2:/pyenv/bin:/pyenv/libexec-/orig/path1:/orig/path2:/pyenv/bin:/pyenv/libexec]\": 0.0011357999997017032,\r\n    \"tests/unit/utils/test_utils.py::test_fix_env_pyenv[/pyenv/bin:/pyenv/libexec:/orig/path1:/orig/path2-/orig/path1:/orig/path2]\": 0.0011476000001948705,\r\n    \"tests/unit/utils/test_utils.py::test_fix_env_pyenv[/pyenv/bin:/pyenv/libexec:/pyenv/plugins/plugin:/orig/path1:/orig/path2-/orig/path1:/orig/path2]\": 0.0013505999997960316,\r\n    \"tests/unit/utils/test_utils.py::test_fix_env_pyenv[/pyenv/bin:/some/libexec:/pyenv/plugins/plugin:/orig/path1:/orig/path2-/orig/path1:/orig/path2]\": 0.0011360999999396881,\r\n    \"tests/unit/utils/test_utils.py::test_hint_on_lockfile\": 0.004755899999963731,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[../models/stage.dvc-out4-def]\": 0.004768700000113313,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[../something.dvc:name-out7-None]\": 0.003745400000070731,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[:build2@{'level': [1, 2, 3]}-out16-None]\": 0.004095799999959127,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[:build@15-out11-None]\": 0.0036861999999473483,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[:build@{'level': 35}-out12-None]\": 0.0039848999999776424,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[:name-out2-None]\": 0.003986399999803325,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[:name-out5-default]\": 0.0037832000000435073,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[build2@{'level': [1, 2, 3]}-out15-None]\": 0.003791099999943981,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[build@15-out9-None]\": 0.003646100000196384,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[build@{'level': 35}-out10-None]\": 0.003970599999775004,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[dvc.yaml-out0-None]\": 0.004136300000027404,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[dvc.yaml:build2@{'level': [1, 2, 3]}-out17-None]\": 0.004368599999907019,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[dvc.yaml:build@15-out13-None]\": 0.00391149999973095,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[dvc.yaml:build@{'level': 35}-out14-None]\": 0.0038666999998895335,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[dvc.yaml:name-out1-None]\": 0.0038415000001350563,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[file-out8-None]\": 0.003750200000240511,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[something.dvc:name-out6-None]\": 0.003912399999990157,\r\n    \"tests/unit/utils/test_utils.py::test_parse_target[stage.dvc-out3-None]\": 0.0037690999999995256,\r\n    \"tests/unit/utils/test_utils.py::test_relpath_windows\": 0.0037197999999989406,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[dir-None-False-dir]\": 0.004356400000006033,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[dir-other_dir-False-other_dir]\": 0.004498799999737457,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[dir-other_dir-True-other_dir\\\\\\\\dir]\": 0.004664899999852423,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[dir/-None-False-dir]\": 0.004348000000163665,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[target-None-False-target]\": 0.004511699999738994,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[target-dir-True-dir\\\\\\\\target]\": 0.00443119999999908,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[target-dir\\\\\\\\subdir-True-dir\\\\\\\\subdir\\\\\\\\target]\": 0.004393299999946976,\r\n    \"tests/unit/utils/test_utils.py::test_resolve_output[target-file_target-False-file_target]\": 0.0044926999996732775\r\n}\r\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "content": "---\nname: \"\\U0001F41B Bug Report\"\nabout: Create a bug report to help us improve DVC\n---\n\n# Bug Report\n\n<!--\n## Issue name\n\nIssue names must follow the pattern `command: description` where the command is the dvc command that you are trying to run. The description should describe the consequence of the bug.\n\nExample: `repro: doesn't detect input changes`\n-->\n\n## Description\n\n<!--\nA clear and concise description of what the bug is.\n-->\n\n### Reproduce\n\n<!--\nStep list of how to reproduce the bug\n-->\n\n<!--\nExample:\n\n1. dvc init\n2. Copy dataset.zip to the directory\n3. dvc add dataset.zip\n4. dvc run -d dataset.zip -o model ./train.sh\n5. modify dataset.zip\n6. dvc repro\n-->\n\n### Expected\n\n<!--\nA clear and concise description of what you expect to happen.\n-->\n\n### Environment information\n\n<!--\nThis is required to ensure that we can reproduce the bug.\n-->\n\n**Output of `dvc doctor`:**\n\n```console\n$ dvc doctor\n```\n\n**Additional Information (if any):**\n\n<!--\nPlease check https://github.com/treeverse/dvc/wiki/Debugging-DVC on ways to gather more information regarding the issue.\n\nIf applicable, please also provide a `--verbose` output of the command, eg: `dvc add --verbose`.\nIf the issue is regarding the performance, please attach the profiling information and the benchmark comparisons.\n-->\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "content": "blank_issues_enabled: false\ncontact_links:\n  - name: \"🤗 Need help?\"\n    url: https://dvc.org/chat\n    about: If you have a question, ask us on Discord. Please join with this invite 👉\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "content": "---\nname: \"\\U0001F680 Feature Request\"\nabout: Suggest an idea for this project\n---\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "* [ ] ❗ I have followed the [Contributing to DVC](https://dvc.org/doc/user-guide/contributing/core) checklist.\n\n* [ ] 📖 If this PR requires [documentation](https://dvc.org/doc) updates, I have created a separate PR (or issue, at least) in [dvc.org](https://github.com/treeverse/dvc.org) and linked it here.\n\nThank you for the contribution - we'll try to review it as soon as possible. 🙏\n"
  },
  {
    "path": ".github/codecov.yml",
    "content": "codecov:\n  notify:\n    wait_for_ci: true\ncoverage:\n  status:\n    project:\n      default:\n        target: auto\n        threshold: 2%\n    patch: off\ngithub_checks:\n  annotations: false\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "version: 2\n\nupdates:\n  - directory: \"/\"\n    package-ecosystem: \"pip\"\n    schedule:\n      interval: \"daily\"\n    labels:\n      - \"maintenance\"\n\n  - directory: \"/\"\n    package-ecosystem: \"github-actions\"\n    schedule:\n      interval: \"daily\"\n    labels:\n      - \"maintenance\"\n"
  },
  {
    "path": ".github/release.yml",
    "content": "changelog:\n  exclude:\n    labels: [\"skip-changelog\"]\n  categories:\n    - title: 🛠 Breaking Changes\n      labels: [\"breaking-change\"]\n    - title: 🚀 New Features and Enhancements\n      labels: [\"feature\", \"enhancement\", \"ui\"]\n    - title: ⚡ Optimizations\n      labels: [\"optimize\", \"performance\"]\n    - title: 🐛 Bug Fixes\n      labels: [\"bugfix\", \"bug\"]\n    - title: 🔨 Maintenance\n      labels: [\"maintenance\", \"refactoring\", \"chore\", \"build\", \"ci\"]\n    - title: Other Changes\n      labels: [\"*\"]\n"
  },
  {
    "path": ".github/workflows/benchmarks.yaml",
    "content": "name: benchmarks\non: [pull_request, workflow_dispatch]\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}\n  cancel-in-progress: true\n\nenv:\n  FORCE_COLOR: \"1\"\n  PY_COLORS: \"1\"\n\njobs:\n  bench:\n    name: run benchmarks\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/setup-python@v6\n        with:\n            python-version: \"3.14\"\n\n      - uses: actions/checkout@v6\n        with:\n          ref: ${{ github.event.pull_request.base.sha }}\n          fetch-depth: 0\n\n      - uses: astral-sh/setup-uv@v7\n        with:\n          enable-cache: true\n          cache-dependency-glob: pyproject.toml\n\n      - run: uv pip install '.[tests]' --system\n      - name: run benchmarks on base branch\n        run: pytest --benchmark-autosave dvc/testing/benchmarks/ -k 'test_init or test_help'\n\n      - uses: actions/checkout@v6\n        with:\n          fetch-depth: 0\n          clean: false\n      - run: uv pip install '.[tests]' --system\n      - name: run benchmarks for PR\n        run: >\n          pytest --benchmark-compare --benchmark-compare-fail=min:5%\n          --benchmark-group-by name\n          dvc/testing/benchmarks/ -k 'test_init or test_help'\n"
  },
  {
    "path": ".github/workflows/build.yaml",
    "content": "name: Build and upload package\non:\n  push:\n    branches:\n    - main\n  release:\n    types:\n    - published\n  workflow_dispatch:\n\nenv:\n  FORCE_COLOR: 1\n\npermissions:\n  contents: read\n\njobs:\n  build:\n    name: Build & Verify package\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        fetch-depth: 0\n\n    - uses: actions/setup-python@v6\n      with:\n        python-version: \"3.14\"\n\n    - uses: astral-sh/setup-uv@v7\n\n    - name: Omit local version for Test PyPI upload\n      if: github.event_name == 'push' && github.ref == 'refs/heads/main'\n      run: echo SETUPTOOLS_SCM_OVERRIDES_FOR_DVC='{local_scheme=\"no-local-version\"}' >> $GITHUB_ENV\n\n    - name: Build Python Package\n      run: |\n        echo 'PKG = \"pip\"'>dvc/_build.py\n        uv build\n\n    - name: Check dist\n      run: uv tool run twine check --strict dist/*\n\n    - uses: actions/upload-artifact@v7\n      with:\n        name: Packages\n        path: dist/\n        if-no-files-found: error\n\n  test-pypi-publish:\n    name: Publish dev package to test.pypi.org\n    runs-on: ubuntu-latest\n    needs: build\n    if: ${{ github.event.action == 'published' || (github.repository == 'treeverse/dvc' && github.event_name == 'push' && github.ref == 'refs/heads/main') }}\n\n    environment:\n      name: test-pypi\n      url: https://test.pypi.org/p/dvc/${{ github.event.release.tag_name }}\n\n    permissions:\n      id-token: write\n\n    steps:\n    - uses: actions/download-artifact@v8\n      with:\n        name: Packages\n        path: dist\n\n    - name: Upload package to Test PyPI\n      uses: pypa/gh-action-pypi-publish@release/v1\n      with:\n        repository-url: https://test.pypi.org/legacy/\n        skip-existing: true\n\n  pypi-publish:\n    name: Publish released package to pypi.org\n    runs-on: ubuntu-latest\n    needs: build\n    if: github.event.action == 'published'\n\n    environment:\n      name: pypi\n      url: https://pypi.org/p/dvc/${{ github.event.release.tag_name }}\n\n    permissions:\n      id-token: write\n\n    steps:\n    - uses: actions/download-artifact@v8\n      with:\n        name: Packages\n        path: dist\n\n    - name: Upload package to PyPI\n      uses: pypa/gh-action-pypi-publish@release/v1\n"
  },
  {
    "path": ".github/workflows/codeql.yml",
    "content": "name: \"CodeQL\"\n\non:\n  push:\n    branches: [\"main\"]\n  pull_request:\n    branches: [\"main\"]\n  schedule:\n    - cron: '32 19 * * 2'\n  workflow_dispatch:\n\npermissions:\n  contents: read\n\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n    permissions:\n      actions: read\n      contents: read\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: ['python']\n\n    steps:\n    - name: Checkout repository\n      uses: actions/checkout@v6\n\n    - name: Initialize CodeQL\n      uses: github/codeql-action/init@v4\n      with:\n        languages: ${{ matrix.language }}\n        queries: security-extended\n\n    - name: Perform CodeQL Analysis\n      uses: github/codeql-action/analyze@v4\n"
  },
  {
    "path": ".github/workflows/plugin_tests.yaml",
    "content": "name: Remote Plugin Tests\n\non:\n  pull_request:\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  tests:\n    timeout-minutes: 45\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [ubuntu-latest]\n        pyv: [\"3.14\"]\n        plugin: [\"dvc-s3\"]\n\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        path: dvc\n\n    - uses: actions/checkout@v6\n      with:\n        repository: treeverse/${{ matrix.plugin }}\n        ref: main\n        path: ${{ matrix.plugin }}\n\n    - name: Set up Python\n      uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.pyv }}\n\n    - uses: astral-sh/setup-uv@v7\n      with:\n        enable-cache: true\n        cache-dependency-glob: \"**/pyproject.toml\"\n        cache-suffix: ${{ matrix.pyv }}\n\n    - name: Install plugin + DVC@PR\n      run: |\n        uv pip install \"./dvc[testing]\" --system\n        uv pip install -e \"./${{ matrix.plugin }}[tests]\" --system\n\n    - name: Run plugin tests\n      timeout-minutes: 15\n      working-directory: ${{ matrix.plugin }}\n      run: pytest -v -n=auto\n"
  },
  {
    "path": ".github/workflows/tests.yaml",
    "content": "name: Tests\n\non:\n  push:\n    branches: [main]\n  pull_request:\n  schedule:\n    - cron: '5 1 * * *'  # every day at 01:05\n  workflow_dispatch:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}\n  cancel-in-progress: true\n\npermissions:\n  contents: read\n\njobs:\n  lint:\n    timeout-minutes: 10\n    runs-on: ${{ matrix.os }}\n    env:\n       DVC_NO_ANALYTICS: true\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [ubuntu-latest, macos-latest, windows-latest]\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        fetch-depth: 0\n    - name: Set up Python\n      uses: actions/setup-python@v6\n      with:\n        python-version: '3.14'\n    - uses: astral-sh/setup-uv@v7\n      with:\n        enable-cache: true\n        cache-dependency-glob: pyproject.toml\n    - run: uv pip install -e \".[dev]\" --system\n      env:\n        UV_LINK_MODE: ${{ runner.os == 'Windows' && 'symlink' || 'hardlink' }}\n\n    - name: Cache mypy\n      uses: actions/cache@v5\n      with:\n        path: .mypy_cache\n        key: mypy-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}\n    - uses: pre-commit/action@v3.0.1\n\n  tests:\n    timeout-minutes: 20\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        os: [ubuntu-latest, macos-latest]\n        pyv: [\"3.9\", \"3.10\", \"3.11\", \"3.12\", \"3.13\", \"3.14\"]\n        include:\n        - os: windows-latest\n          pyv: \"3.9\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.9\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.9\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.9\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n        - os: windows-latest\n          pyv: \"3.10\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.10\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.10\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.10\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n        - os: windows-latest\n          pyv: \"3.11\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.11\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.11\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.11\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n        - os: windows-latest\n          pyv: \"3.12\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.12\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.12\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.12\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n        - os: windows-latest\n          pyv: \"3.13\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.13\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.13\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.13\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n        - os: windows-latest\n          pyv: \"3.14\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 1\"\n        - os: windows-latest\n          pyv: \"3.14\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 2\"\n        - os: windows-latest\n          pyv: \"3.14\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 3\"\n        - os: windows-latest\n          pyv: \"3.14\"\n          pytestargs: \"--splitting-algorithm=least_duration --splits 4 --group 4\"\n    steps:\n    - uses: actions/checkout@v6\n      with:\n        fetch-depth: 0\n    - name: Set up Python\n      uses: actions/setup-python@v6\n      with:\n        python-version: ${{ matrix.pyv }}\n    - uses: astral-sh/setup-uv@v7\n      with:\n        enable-cache: true\n        cache-dependency-glob: pyproject.toml\n        cache-suffix: ${{ matrix.pyv }}\n    - run: uv pip install -e \".[dev]\" pytest-split --system\n      env:\n        UV_LINK_MODE: ${{ runner.os == 'Windows' && 'symlink' || 'hardlink' }}\n\n    - name: run tests\n      timeout-minutes: 40\n      env:\n        COVERAGE_CORE: sysmon\n        PYTHONUTF8: 1\n      run: >\n        pytest ${{ matrix.pytestargs }} -n=logical --dist=worksteal --timeout=300 --durations=0\n        --cov --cov-report=xml --cov-report=term  --durations-path=./.github/.test_durations\n    - name: upload coverage report\n      uses: codecov/codecov-action@v5\n  check:\n    if: always()\n    needs: [tests]\n    runs-on: ubuntu-latest\n    steps:\n      - uses: re-actors/alls-green@release/v1\n        with:\n          jobs: ${{ toJSON(needs) }}\n  notify:\n    if: github.ref == 'refs/heads/main' && failure()\n    needs: [tests]\n    runs-on: ubuntu-latest\n    steps:\n    - name: Slack Notification\n      uses: rtCamp/action-slack-notify@v2.3.3\n      env:\n        SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}\n        SLACK_COLOR: ${{ job.status }}\n        SLACK_MESSAGE: 'CI Failed on main :boom:'\n        SLACK_TITLE: CI Status\n        SLACK_USERNAME: DVC-CI\n"
  },
  {
    "path": ".gitignore",
    "content": "__pycache__/\nneatlynx.conf\n.idea\n/cache\n*.pyc\n\n.env*/\n.venv\nenv/\nvenv/\n.python-version\n\n.dvc.conf.lock\n.DS_Store\nbuild/\ndist/\n\n*.egg-info/\n.eggs\n\nhooks/*.pyc\n\n*.rpm\n*.deb\n\ninnosetup/config.ini\n\n*.exe\n\n.coverage\n.coverage.*\ncoverage.xml\n\n*.sw?\n\npip-wheel-metadata/\n.vscode/\n\nazurite\nenv.sh\ntests/remotes_env\nscripts/ci/gcp-creds.json\n.gcp-creds.json\n\n*~\n/dvc/_version.py\n\n.mypy_cache/\n.pytest_cache/\n.nox/\n.tox/\nhtmlcov/\n"
  },
  {
    "path": ".mailmap",
    "content": "Paweł Redzyński <pawelredzynski@gmail.com>\nDmitry Petrov <dmitry.petrov@nevesomo.com>\nEarl Hathaway <github@earlh.com>\nNabanita Dash <dashnabanita@gmail.com>\nKurian Benoy <kurian.bkk@gmail.com>\nSritanu Chakraborty <sritanu25@gmail.com>\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "default_language_version:\n  python: python3\n\nci:\n  skip: [mypy, dvc-pre-commit]\n\nrepos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v6.0.0\n    hooks:\n      - id: check-added-large-files\n      - id: check-case-conflict\n      - id: check-executables-have-shebangs\n      - id: check-json\n      - id: check-merge-conflict\n        exclude: \"tests/func/utils/test_strict_yaml.py\"\n        args: ['--assume-in-merge']\n      - id: check-toml\n      - id: check-yaml\n      - id: end-of-file-fixer\n      - id: mixed-line-ending\n      - id: sort-simple-yaml\n      - id: trailing-whitespace\n  - repo: https://github.com/astral-sh/ruff-pre-commit\n    rev: 'v0.14.13'\n    hooks:\n      - id: ruff-check\n        args: [--fix, --exit-non-zero-on-fix]\n      - id: ruff-format\n  - repo: https://github.com/codespell-project/codespell\n    rev: v2.4.1\n    hooks:\n      - id: codespell\n        additional_dependencies: [\"tomli\"]\n  - repo: local\n    hooks:\n    - id: mypy\n      name: mypy\n      entry: mypy\n      files: ^dvc/\n      language: system\n      types: [python]\n      require_serial: true\n  - hooks:\n      - args:\n          - git-hook\n          - pre-commit\n        entry: dvc\n        id: dvc-pre-commit\n        language: system\n        name: DVC pre-commit\n        stages:\n          - pre-commit\n        verbose: true\n        require_serial: true\n      - args:\n          - git-hook\n          - pre-push\n        entry: dvc\n        id: dvc-pre-push\n        language: system\n        name: DVC pre-push\n        stages:\n          - pre-push\n        require_serial: true\n      - always_run: true\n        args:\n          - git-hook\n          - post-checkout\n        entry: dvc\n        id: dvc-post-checkout\n        language: system\n        minimum_pre_commit_version: 2.2.0\n        name: DVC post-checkout\n        stages:\n          - post-checkout\n        require_serial: true\n    repo: local\n"
  },
  {
    "path": ".pre-commit-hooks.yaml",
    "content": "- args:\n  - git-hook\n  - pre-commit\n  entry: dvc\n  id: dvc-pre-commit\n  language: python\n  language_version: python3\n  name: DVC pre-commit\n  require_serial: true\n  stages:\n  - pre-commit\n  verbose: true\n- args:\n  - git-hook\n  - pre-push\n  entry: dvc\n  id: dvc-pre-push\n  language: python\n  language_version: python3\n  name: DVC pre-push\n  require_serial: true\n  stages:\n  - pre-push\n- always_run: true\n  args:\n  - git-hook\n  - post-checkout\n  entry: dvc\n  id: dvc-post-checkout\n  language: python\n  language_version: python3\n  minimum_pre_commit_version: 3.2.0\n  name: DVC post-checkout\n  require_serial: true\n  stages:\n  - post-checkout\n"
  },
  {
    "path": ".zenodo.json",
    "content": "{\n  \"title\": \"DVC: Data Version Control - Git for Data & Models\",\n  \"keywords\": [\n    \"data-science\", \"data-version-control\", \"machine-learning\", \"git\",\n    \"developer-tools\", \"reproducibility\", \"collaboration\", \"ai\", \"python\"],\n  \"contributors\": [\n    {\"name\": \"DVC team\", \"type\": \"Other\", \"affiliation\": \"Treeverse\"}]\n}\n"
  },
  {
    "path": "CITATION.cff",
    "content": "cff-version: 1.2.0\ntitle: 'DVC: Data Version Control - Git for Data & Models\"'\nmessage: >-\n  If you use this software as part of a publication and wish\n  to cite it, please use the metadata from this file.\ntype: software\nauthors:\n  - name: The DVC team and contributors\n    website: 'https://github.com/treeverse'\nrepository-code: 'https://github.com/treeverse/dvc'\nurl: 'https://dvc.org'\nabstract: >-\n  DVC is a tool for data science that takes advantage of existing software engineering toolset.\n  It helps machine learning teams manage large datasets, make projects reproducible, and collaborate better.\nkeywords:\n  - ai\n  - collaboration\n  - data-science\n  - data-version-control\n  - developer-tools\n  - git\n  - machine-learning\n  - python\n  - reproducibility\nlicense: Apache-2.0\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and maintainers pledge to making participation in our project and\nour community a harassment-free experience for everyone, regardless of age, body\nsize, disability, ethnicity, sex characteristics, gender identity and expression,\nlevel of experience, education, socio-economic status, nationality, personal\nappearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment\ninclude:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or\n  advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic\n  address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable\nbehavior and are expected to take appropriate and fair corrective action in\nresponse to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviors that they deem inappropriate,\nthreatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies both within project spaces and in public spaces\nwhen an individual is representing the project or its community. Examples of\nrepresenting a project or community include using an official project e-mail\naddress, posting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event. Representation of a project may be\nfurther defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported by contacting the project team at info@dvc.org. All\ncomplaints will be reviewed and investigated and will result in a response that\nis deemed necessary and appropriate to the circumstances. The project team is\nobligated to maintain confidentiality with regard to the reporter of an incident.\nFurther details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good\nfaith may face temporary or permanent repercussions as determined by other\nmembers of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,\navailable at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see\nhttps://www.contributor-covenant.org/faq\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "### See our contribution guide at [dvc.org](https://dvc.org/doc/user-guide/contributing/core).\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2025 Treeverse.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.rst",
    "content": "|Banner|\n\n`Website <https://dvc.org>`_\n• `Docs <https://dvc.org/doc>`_\n• `Blog <http://blog.dataversioncontrol.com>`_\n• `Tutorial <https://dvc.org/doc/get-started>`_\n• `Related Technologies <https://dvc.org/doc/user-guide/related-technologies>`_\n• `How DVC works`_\n• `VS Code Extension`_\n• `Installation`_\n• `Contributing`_\n• `Community and Support`_\n\n|CI| |Python Version| |Coverage| |VS Code| |DOI|\n\n|PyPI| |PyPI Downloads| |Packages| |Brew| |Conda| |Choco| |Snap|\n\n|\n\n**Data Version Control** or **DVC** is a command line tool and `VS Code Extension`_ to help you develop reproducible machine learning projects:\n\n#. **Version** your data and models.\n   Store them in your cloud storage but keep their version info in your Git repo.\n\n#. **Iterate** fast with lightweight pipelines.\n   When you make changes, only run the steps impacted by those changes.\n\n#. **Track** experiments in your local Git repo (no servers needed).\n\n#. **Compare** any data, code, parameters, model, or performance plots.\n\n#. **Share** experiments and automatically reproduce anyone's experiment.\n\nQuick start\n===========\n\n    Please read our `Command Reference <https://dvc.org/doc/command-reference>`_ for a complete list.\n\nA common CLI workflow includes:\n\n\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Task                              | Terminal                                                                                           |\n+===================================+====================================================================================================+\n| Track data                        | | ``$ git add train.py params.yaml``                                                               |\n|                                   | | ``$ dvc add images/``                                                                            |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Connect code and data             | | ``$ dvc stage add -n featurize -d images/ -o features/ python featurize.py``                     |\n|                                   | | ``$ dvc stage add -n train -d features/ -d train.py -o model.p -M metrics.json python train.py`` |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Make changes and experiment       | | ``$ dvc exp run -n exp-baseline``                                                                |\n|                                   | | ``$ vi train.py``                                                                                |\n|                                   | | ``$ dvc exp run -n exp-code-change``                                                             |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Compare and select experiments    | | ``$ dvc exp show``                                                                               |\n|                                   | | ``$ dvc exp apply exp-baseline``                                                                 |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Share code                        | | ``$ git add .``                                                                                  |\n|                                   | | ``$ git commit -m 'The baseline model'``                                                         |\n|                                   | | ``$ git push``                                                                                   |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n| Share data and ML models          | | ``$ dvc remote add myremote -d s3://mybucket/image_cnn``                                         |\n|                                   | | ``$ dvc push``                                                                                   |\n+-----------------------------------+----------------------------------------------------------------------------------------------------+\n\nHow DVC works\n=============\n\n    We encourage you to read our `Get Started\n    <https://dvc.org/doc/get-started>`_ docs to better understand what DVC\n    does and how it can fit your scenarios.\n\nThe closest *analogies* to describe the main DVC features are these:\n\n#. **Git for data**: Store and share data artifacts (like Git-LFS but without a server) and models, connecting them with a Git repository. Data management meets GitOps!\n#. **Makefiles** for ML: Describes how data or model artifacts are built from other data and code in a standard format. Now you can version your data pipelines with Git.\n#. Local **experiment tracking**: Turn your machine into an ML experiment management platform, and collaborate with others using existing Git hosting (Github, Gitlab, etc.).\n\nGit is employed as usual to store and version code (including DVC meta-files as placeholders for data).\nDVC `stores data and model files <https://dvc.org/doc/start/data-management>`_ seamlessly in a cache outside of Git, while preserving almost the same user experience as if they were in the repo.\nTo share and back up the *data cache*, DVC supports multiple remote storage platforms - any cloud (S3, Azure, Google Cloud, etc.) or on-premise network storage (via SSH, for example).\n\n|Flowchart|\n\n`DVC pipelines <https://dvc.org/doc/start/data-management/data-pipelines>`_ (computational graphs) connect code and data together.\nThey specify all steps required to produce a model: input dependencies including code, data, commands to run; and output information to be saved.\n\nLast but not least, `DVC Experiment Versioning <https://dvc.org/doc/start/experiments>`_ lets you prepare and run a large number of experiments.\nTheir results can be filtered and compared based on hyperparameters and metrics, and visualized with multiple plots.\n\n.. _`VS Code Extension`:\n\nVS Code Extension\n=================\n\n|VS Code|\n\nTo use DVC as a GUI right from your VS Code IDE, install the `DVC Extension <https://marketplace.visualstudio.com/items?itemName=Iterative.dvc>`_ from the Marketplace.\nIt currently features experiment tracking and data management, and more features (data pipeline support, etc.) are coming soon!\n\n|VS Code Extension Overview|\n\n    Note: You'll have to install core DVC on your system separately (as detailed\n    below). The Extension will guide you if needed.\n\nInstallation\n============\n\nThere are several ways to install DVC: in VS Code; using ``snap``, ``choco``, ``brew``, ``conda``, ``pip``; or with an OS-specific package.\nFull instructions are `available here <https://dvc.org/doc/get-started/install>`_.\n\nSnapcraft (Linux)\n-----------------\n\n|Snap|\n\n.. code-block:: bash\n\n   snap install dvc --classic\n\nThis corresponds to the latest tagged release.\nAdd ``--beta`` for the latest tagged release candidate, or ``--edge`` for the latest ``main`` version.\n\nChocolatey (Windows)\n--------------------\n\n|Choco|\n\n.. code-block:: bash\n\n   choco install dvc\n\nBrew (mac OS)\n-------------\n\n|Brew|\n\n.. code-block:: bash\n\n   brew install dvc\n\nAnaconda (Any platform)\n-----------------------\n\n|Conda|\n\n.. code-block:: bash\n\n   conda install -c conda-forge mamba # installs much faster than conda\n   mamba install -c conda-forge dvc\n\nDepending on the remote storage type you plan to use to keep and share your data, you might need to install optional dependencies: `dvc-s3`, `dvc-azure`, `dvc-gdrive`, `dvc-gs`, `dvc-oss`, `dvc-ssh`.\n\nPyPI (Python)\n-------------\n\n|PyPI|\n\n.. code-block:: bash\n\n   pip install dvc\n\nDepending on the remote storage type you plan to use to keep and share your data, you might need to specify one of the optional dependencies: ``s3``, ``gs``, ``azure``, ``oss``, ``ssh``. Or ``all`` to include them all.\nThe command should look like this: ``pip install 'dvc[s3]'`` (in this case AWS S3 dependencies such as ``boto3`` will be installed automatically).\n\nTo install the development version, run:\n\n.. code-block:: bash\n\n   pip install \"dvc @ git+https://github.com/treeverse/dvc\"\n\nPackage (Platform-specific)\n---------------------------\n\n|Packages|\n\nSelf-contained packages for Linux, Windows, and Mac are available.\nThe latest version of the packages can be found on the GitHub `releases page <https://github.com/treeverse/dvc/releases>`_.\n\nUbuntu / Debian (deb)\n^^^^^^^^^^^^^^^^^^^^^\n.. code-block:: bash\n\n   sudo wget https://dvc.org/deb/dvc.list -O /etc/apt/sources.list.d/dvc.list\n   wget -qO - https://dvc.org/deb/iterative.asc | sudo apt-key add -\n   sudo apt update\n   sudo apt install dvc\n\nFedora / CentOS (rpm)\n^^^^^^^^^^^^^^^^^^^^^\n.. code-block:: bash\n\n   sudo wget https://dvc.org/rpm/dvc.repo -O /etc/yum.repos.d/dvc.repo\n   sudo rpm --import https://dvc.org/rpm/iterative.asc\n   sudo yum update\n   sudo yum install dvc\n\nContributing\n============\n\nContributions are welcome!\nPlease see our `Contributing Guide <https://dvc.org/doc/user-guide/contributing/core>`_ for more details.\nThanks to all our contributors!\n\n|Contribs|\n\nCommunity and Support\n=====================\n\n* `Twitter <https://twitter.com/DVCorg>`_\n* `Forum <https://discuss.dvc.org/>`_\n* `Discord Chat <https://dvc.org/chat>`_\n* `Email <mailto:support@dvc.org>`_\n* `Mailing List <https://dvc.org/community#subscribe>`_\n\nCopyright\n=========\n\nThis project is distributed under the Apache license version 2.0 (see the LICENSE file in the project root).\n\nBy submitting a pull request to this project, you agree to license your contribution under the Apache license version 2.0 to this project.\n\nCitation\n========\n\n|DOI|\n\nIterative, *DVC: Data Version Control - Git for Data & Models* (2020)\n`DOI:10.5281/zenodo.012345 <https://doi.org/10.5281/zenodo.3677553>`_.\n\nBarrak, A., Eghan, E.E. and Adams, B. `On the Co-evolution of ML Pipelines and Source Code - Empirical Study of DVC Projects <https://mcis.cs.queensu.ca/publications/2021/saner.pdf>`_ , in Proceedings of the 28th IEEE International Conference on Software Analysis, Evolution, and Reengineering, SANER 2021. Hawaii, USA.\n\n\n.. |Banner| image:: https://dvc.org/img/logo-github-readme.png\n   :target: https://dvc.org\n   :alt: DVC logo\n\n.. |VS Code Extension Overview| image:: https://raw.githubusercontent.com/treeverse/vscode-dvc/main/extension/docs/overview.gif\n   :alt: DVC Extension for VS Code\n\n.. |CI| image:: https://github.com/treeverse/dvc/actions/workflows/tests.yaml/badge.svg\n   :target: https://github.com/treeverse/dvc/actions/workflows/tests.yaml\n   :alt: GHA Tests\n\n.. |Python Version| image:: https://img.shields.io/pypi/pyversions/dvc\n   :target: https://pypi.org/project/dvc\n   :alt: Python Version\n\n.. |Coverage| image:: https://codecov.io/gh/treeverse/dvc/branch/main/graph/badge.svg\n   :target: https://codecov.io/gh/treeverse/dvc\n   :alt: Codecov\n\n.. |Snap| image:: https://img.shields.io/badge/snap-install-82BEA0.svg?logo=snapcraft\n   :target: https://snapcraft.io/dvc\n   :alt: Snapcraft\n\n.. |Choco| image:: https://img.shields.io/chocolatey/v/dvc?label=choco\n   :target: https://chocolatey.org/packages/dvc\n   :alt: Chocolatey\n\n.. |Brew| image:: https://img.shields.io/homebrew/v/dvc?label=brew\n   :target: https://formulae.brew.sh/formula/dvc\n   :alt: Homebrew\n\n.. |Conda| image:: https://anaconda.org/conda-forge/dvc/badges/version.svg\n   :target: https://anaconda.org/conda-forge/dvc\n   :alt: Conda-forge\n\n.. |PyPI| image:: https://img.shields.io/pypi/v/dvc.svg?label=pip&logo=PyPI&logoColor=white\n   :target: https://pypi.org/project/dvc\n   :alt: PyPI\n\n.. |PyPI Downloads| image:: https://img.shields.io/pypi/dm/dvc.svg?color=blue&label=Downloads&logo=pypi&logoColor=gold\n   :target: https://pypi.org/project/dvc\n   :alt: PyPI Downloads\n\n.. |Packages| image:: https://img.shields.io/badge/deb|pkg|rpm|exe-blue\n   :target: https://dvc.org/doc/install\n   :alt: deb|pkg|rpm|exe\n\n.. |DOI| image:: https://img.shields.io/badge/DOI-10.5281/zenodo.3677553-blue.svg\n   :target: https://doi.org/10.5281/zenodo.3677553\n   :alt: DOI\n\n.. |Flowchart| image:: https://dvc.org/img/flow.gif\n   :target: https://dvc.org/img/flow.gif\n   :alt: how_dvc_works\n\n.. |Contribs| image:: https://contrib.rocks/image?repo=treeverse/dvc\n   :target: https://github.com/treeverse/dvc/graphs/contributors\n   :alt: Contributors\n\n.. |VS Code| image:: https://img.shields.io/visual-studio-marketplace/v/Iterative.dvc?color=blue&label=VSCode&logo=visualstudiocode&logoColor=blue\n   :target: https://marketplace.visualstudio.com/items?itemName=Iterative.dvc\n   :alt: VS Code Extension\n"
  },
  {
    "path": "dvc/__init__.py",
    "content": "\"\"\"\nDVC\n----\nMake your data science projects reproducible and shareable.\n\"\"\"\n\nimport dvc.logger\nfrom dvc.build import PKG  # noqa: F401\nfrom dvc.version import __version__, version_tuple  # noqa: F401\n\ndvc.logger.setup()\n"
  },
  {
    "path": "dvc/__main__.py",
    "content": "\"\"\"Main entry point for DVC command line tool.\"\"\"\n\nimport sys\n\nfrom dvc.cli import main\n\nif __name__ == \"__main__\":\n    sys.exit(main(sys.argv[1:]))\n"
  },
  {
    "path": "dvc/__pyinstaller/__init__.py",
    "content": "import os\n\n\ndef get_hook_dirs():\n    return [os.path.dirname(__file__)]\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-asyncssh.py",
    "content": "# ruff: noqa: N999\nhiddenimports = [\"win32timezone\"]\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-celery.py",
    "content": "# ruff: noqa: N999\n\nfrom PyInstaller.utils.hooks import collect_submodules, is_module_or_submodule\n\n# Celery dynamically imports most celery internals at runtime\n# pyinstaller hook must expose all modules loaded by\n# kombu.utils.imports:symbol_by_name()\n_EXCLUDES = (\"celery.bin\", \"celery.contrib\")\nhiddenimports = collect_submodules(\n    \"celery\",\n    filter=lambda name: not any(\n        is_module_or_submodule(name, module) for module in _EXCLUDES\n    ),\n)\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-dvc.py",
    "content": "# ruff: noqa: N999\nfrom PyInstaller.utils.hooks import copy_metadata\n\n# needed for `dvc doctor` to show dep versions\ndatas = copy_metadata(\"adlfs\", recursive=True)\ndatas += copy_metadata(\"knack\")\ndatas += copy_metadata(\"gcsfs\")\ndatas += copy_metadata(\"pyarrow\")\ndatas += copy_metadata(\"pydrive2\")\ndatas += copy_metadata(\"s3fs\", recursive=True)\ndatas += copy_metadata(\"ossfs\")\ndatas += copy_metadata(\"sshfs\")\ndatas += copy_metadata(\"webdav4\")\ndatas += copy_metadata(\"aiohttp\")\ndatas += copy_metadata(\"aiohttp_retry\")\n\nhiddenimports = [\n    \"dvc_azure\",\n    \"dvc_gdrive\",\n    \"dvc_gs\",\n    \"dvc_hdfs\",\n    \"dvc_oss\",\n    \"dvc_s3\",\n    \"dvc_ssh\",\n    \"dvc_webdav\",\n    \"dvc_webhdfs\",\n    # https://github.com/pypa/setuptools/issues/1963\n    \"pkg_resources.py2_warn\",\n]\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-dvc.utils.flatten.py",
    "content": "# ruff: noqa: N999\nfrom PyInstaller.utils.hooks import copy_metadata\n\ndatas = copy_metadata(\"flatten-dict\")\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-dvc_task.py",
    "content": "# ruff: noqa: N999\n\nfrom PyInstaller.utils.hooks import collect_submodules\n\nhiddenimports = collect_submodules(\"dvc_task\")\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-fsspec.py",
    "content": "# ruff: noqa: N999\nhiddenimports = [\"fsspec.implementations.memory\"]\n"
  },
  {
    "path": "dvc/__pyinstaller/hook-pydrive2.py",
    "content": "# ruff: noqa: N999\nfrom PyInstaller.utils.hooks import copy_metadata\n\ndatas = copy_metadata(\"pydrive2\")\n"
  },
  {
    "path": "dvc/_debug.py",
    "content": "from contextlib import ExitStack, contextmanager\nfrom datetime import datetime\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nif TYPE_CHECKING:\n    from argparse import Namespace\n    from types import FrameType\n\n\n@contextmanager\ndef viztracer_profile(\n    path: Union[Callable[[], str], str],\n    depth: int = -1,\n    log_async: bool = False,\n):\n    try:\n        import viztracer  # ty: ignore[unresolved-import]\n    except ImportError:\n        print(\"Failed to run profiler, viztracer is not installed\")  # noqa: T201\n        yield\n        return\n\n    tracer = viztracer.VizTracer(max_stack_depth=depth, log_async=log_async)\n\n    tracer.start()\n    try:\n        yield\n    finally:\n        tracer.stop()\n        tracer.save(path() if callable(path) else path)\n\n\n@contextmanager\ndef yappi_profile(\n    path: Optional[Union[Callable[[], str], str]] = None,\n    wall_clock: Optional[bool] = True,\n    separate_threads: Optional[bool] = False,\n):\n    try:\n        import yappi  # ty: ignore[unresolved-import]\n    except ImportError:\n        print(\"Failed to run profiler, yappi is not installed\")  # noqa: T201\n        yield\n        return\n\n    yappi.set_clock_type(\"wall\" if wall_clock else \"cpu\")\n\n    yappi.start()\n    try:\n        yield\n    finally:\n        yappi.stop()\n\n        threads = yappi.get_thread_stats()\n        stats = {}\n        if separate_threads:\n            for thread in threads:\n                ctx_id = thread.id\n                stats[ctx_id] = yappi.get_func_stats(ctx_id=ctx_id)\n        else:\n            stats[None] = yappi.get_func_stats()\n\n        fpath = path() if callable(path) else path\n        for ctx_id, st in stats.items():\n            if fpath:\n                out = f\"{fpath}-{ctx_id}\" if ctx_id is not None else fpath\n                st.save(out, type=\"callgrind\")\n            else:\n                if ctx_id is not None:\n                    print(f\"\\nThread {ctx_id}\")  # noqa: T201\n                st.print_all()\n                if ctx_id is None:\n                    threads.print_all()\n\n        yappi.clear_stats()\n\n\n@contextmanager\ndef instrument(html_output=False):\n    \"\"\"Run a statistical profiler\"\"\"\n    try:\n        from pyinstrument import Profiler  # ty: ignore[unresolved-import]\n    except ImportError:\n        print(\"Failed to run profiler, pyinstrument is not installed\")  # noqa: T201\n        yield\n        return\n\n    profiler = Profiler()\n\n    profiler.start()\n    try:\n        yield\n    finally:\n        profiler.stop()\n\n        if html_output:\n            profiler.open_in_browser()\n        else:\n            print(profiler.output_text(unicode=True, color=True))  # noqa: T201\n\n\n@contextmanager\ndef profile(dump_path: Optional[str] = None):\n    \"\"\"Run a cprofile\"\"\"\n    import cProfile\n\n    prof = cProfile.Profile()\n    prof.enable()\n\n    try:\n        yield\n    finally:\n        prof.disable()\n        if dump_path:\n            prof.dump_stats(dump_path)\n        else:\n            prof.print_stats(sort=\"cumtime\")\n\n\n@contextmanager\ndef debug():\n    try:\n        yield\n    except Exception:\n        try:\n            import ipdb as pdb  # noqa: T100  # ty: ignore[unresolved-import]\n        except ImportError:\n            import pdb  # type: ignore[no-redef]  # noqa: T100\n        pdb.post_mortem()\n\n        raise  # prevent from jumping ahead\n\n\ndef _sigshow(_, frame: Optional[\"FrameType\"]) -> None:\n    import sys\n    from shutil import get_terminal_size\n    from traceback import format_stack\n\n    lines = \"\\u2015\" * get_terminal_size().columns\n    stack = format_stack(frame)\n    print(lines, \"\\n\", *stack, lines, sep=\"\", file=sys.stderr)  # noqa: T201\n\n\n@contextmanager\ndef show_stack():\n    r\"\"\"Show stack trace on SIGQUIT (Ctrl-\\) or SIGINFO (Ctrl-T on macOS).\"\"\"\n    import signal\n    import sys\n\n    if sys.platform != \"win32\":\n        signal.signal(signal.SIGQUIT, _sigshow)\n\n    try:\n        # only available on macOS\n        signal.signal(signal.SIGINFO, _sigshow)  # type: ignore[attr-defined]\n    except AttributeError:\n        pass\n    yield\n\n\ndef _get_path_func(tool: str, ext: str):\n    fmt = f\"{tool}.dvc-{{now:%Y%m%d}}_{{now:%H%M%S}}.{ext}\"\n\n    def func(now: Optional[\"datetime\"] = None) -> str:\n        return fmt.format(now=now or datetime.now())  # noqa: DTZ005\n\n    return func\n\n\n@contextmanager\ndef debugtools(args: Optional[\"Namespace\"] = None, **kwargs):\n    kw = vars(args) if args else {}\n    kw.update(kwargs)\n\n    with ExitStack() as stack:\n        if kw.get(\"pdb\"):\n            stack.enter_context(debug())\n        if kw.get(\"cprofile\") or kw.get(\"cprofile_dump\"):\n            stack.enter_context(profile(kw.get(\"cprofile_dump\")))\n        if kw.get(\"instrument\") or kw.get(\"instrument_open\"):\n            stack.enter_context(instrument(kw.get(\"instrument_open\", False)))\n        if kw.get(\"show_stack\", False):\n            stack.enter_context(show_stack())\n        if kw.get(\"yappi\"):\n            path_func = _get_path_func(\"callgrind\", \"out\")\n            stack.enter_context(\n                yappi_profile(\n                    path=path_func,\n                    separate_threads=kw.get(\"yappi_separate_threads\"),\n                )\n            )\n        if (\n            kw.get(\"viztracer\")\n            or kw.get(\"viztracer_depth\")\n            or kw.get(\"viztracer_async\")\n        ):\n            path_func = _get_path_func(\"viztracer\", \"json\")\n            depth = kw.get(\"viztracer_depth\") or -1\n            log_async = kw.get(\"viztracer_async\") or False\n            prof = viztracer_profile(path=path_func, depth=depth, log_async=log_async)\n            stack.enter_context(prof)\n        yield\n\n\ndef add_debugging_flags(parser):\n    from argparse import SUPPRESS\n\n    # For detailed info see:\n    # https://github.com/treeverse/dvc/wiki/Debugging,-Profiling-and-Benchmarking-DVC\n    args, _ = parser.parse_known_args()\n    verbose = args.verbose\n\n    def debug_help(msg):\n        if verbose:\n            return msg\n        return SUPPRESS\n\n    parser = parser.add_argument_group(\"debug options\")\n\n    parser.add_argument(\n        \"--cprofile\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Generate cprofile data for tools like snakeviz / tuna\"),\n    )\n    parser.add_argument(\n        \"--cprofile-dump\", help=debug_help(\"Location to dump cprofile file\")\n    )\n    parser.add_argument(\n        \"--yappi\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\n            \"Generate a callgrind file for use with tools like \"\n            \"kcachegrind / qcachegrind\"\n        ),\n    )\n    parser.add_argument(\n        \"--yappi-separate-threads\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Generate one callgrind file per thread\"),\n    )\n    parser.add_argument(\n        \"--viztracer\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Generate a viztracer file for use with vizviewer\"),\n    )\n    parser.add_argument(\n        \"--viztracer-depth\",\n        type=int,\n        help=debug_help(\"Set viztracer maximum stack depth\"),\n    )\n    parser.add_argument(\n        \"--viztracer-async\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Treat async tasks as threads\"),\n    )\n    parser.add_argument(\n        \"--pdb\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Drop into the pdb/ipdb debugger on any exception\"),\n    )\n    parser.add_argument(\n        \"--instrument\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Use pyinstrument CLI profiler\"),\n    )\n    parser.add_argument(\n        \"--instrument-open\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\"Use pyinstrument web profiler\"),\n    )\n    parser.add_argument(\n        \"--show-stack\",\n        \"--ss\",\n        action=\"store_true\",\n        default=False,\n        help=debug_help(\n            r\"Use Ctrl+T on macOS or Ctrl+\\ on Linux to print the stack \"\n            \"frame currently executing. Unavailable on Windows.\"\n        ),\n    )\n"
  },
  {
    "path": "dvc/analytics.py",
    "content": "import json\nimport os\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.log import logger\n\nfrom .env import DVC_ANALYTICS_ENDPOINT, DVC_NO_ANALYTICS\n\nif TYPE_CHECKING:\n    from dvc.scm import Base\n\nlogger = logger.getChild(__name__)\n\n\ndef collect_and_send_report(args=None, return_code=None):\n    \"\"\"\n    Collect information from the runtime/environment and the command\n    being executed into a report and send it over the network.\n\n    To prevent analytics from blocking the execution of the main thread,\n    sending the report is done in a separate process.\n\n    The inter-process communication happens through a file containing the\n    report as a JSON, where the _collector_ generates it and the _sender_\n    removes it after sending it.\n    \"\"\"\n    import tempfile\n\n    from dvc.daemon import daemon\n\n    report = {}\n\n    # Include command execution information on the report only when available.\n    if args and hasattr(args, \"func\"):\n        report.update({\"cmd_class\": args.func.__name__})\n\n    if return_code is not None:\n        report.update({\"cmd_return_code\": return_code})\n\n    with tempfile.NamedTemporaryFile(delete=False, mode=\"w\") as fobj:\n        json.dump(report, fobj)\n\n    logger.trace(\"Saving analytics report to %s\", fobj.name)\n    daemon([\"analytics\", fobj.name])\n\n\ndef is_enabled():\n    from dvc.config import Config, to_bool\n    from dvc.utils import env2bool\n\n    if env2bool(\"DVC_TEST\"):\n        return False\n\n    enabled = not os.getenv(DVC_NO_ANALYTICS)\n    if enabled:\n        enabled = to_bool(\n            Config.from_cwd(validate=False).get(\"core\", {}).get(\"analytics\", \"true\")\n        )\n\n    logger.debug(\"Analytics is %sabled.\", \"en\" if enabled else \"dis\")\n\n    return enabled\n\n\ndef send(path):\n    \"\"\"\n    Side effect: Removes the report after sending it.\n\n    The report is generated and stored in a temporary file, see:\n    `collect_and_send_report`. Sending happens on another process,\n    thus, the need of removing such file afterwards.\n    \"\"\"\n    import requests\n    from requests.exceptions import RequestException\n\n    url = os.environ.get(DVC_ANALYTICS_ENDPOINT, \"https://analytics.dvc.org\")\n    headers = {\"content-type\": \"application/json\"}\n\n    with open(path, encoding=\"utf-8\") as fobj:\n        report = json.load(fobj)\n\n    report.update(_runtime_info())\n\n    logger.debug(\"uploading report to %s\", url)\n    logger.trace(\"Sending %s to %s\", report, url)\n\n    try:\n        requests.post(url, json=report, headers=headers, timeout=5)\n    except RequestException as e:\n        logger.trace(\"\", exc_info=True)\n        logger.debug(\"failed to send analytics report %s\", str(e))\n\n    logger.trace(\"removing report %s\", path)\n    os.remove(path)\n\n\ndef _git_remote_url(scm: Optional[\"Base\"]) -> Optional[str]:\n    from dvc.scm import Git\n\n    if not isinstance(scm, Git):\n        return None\n\n    from dulwich.porcelain import get_remote_repo\n\n    dulwich_repo = scm.dulwich.repo\n    try:\n        _remote, url = get_remote_repo(dulwich_repo)\n    except IndexError:\n        # IndexError happens when the head is detached\n        _remote, url = get_remote_repo(dulwich_repo, b\"origin\")\n    # Dulwich returns (None, \"origin\") if no remote set\n    if (_remote, url) == (None, \"origin\"):\n        return None\n    return url\n\n\ndef _scm_in_use(scm: Optional[\"Base\"]) -> Optional[str]:\n    return type(scm).__name__ if scm else None\n\n\ndef _parse_git_remote_path(remote_url: str) -> str:\n    from urllib.parse import urlparse\n\n    from scmrepo.urls import is_scp_style_url\n\n    parsed = urlparse(remote_url)\n    # Windows Path also gets parsed with a drive letter as scheme\n    # https://github.com/python/cpython/issues/86381\n    if parsed.scheme and parsed.scheme in (\"http\", \"https\", \"git\", \"ssh\"):\n        return parsed.path.strip(\"/\")\n\n    if is_scp_style_url(remote_url):\n        # handle scp-style URL\n        parts = remote_url.split(\":\", 1)\n        if len(parts) == 2:\n            _, path = parts\n            return path.rstrip(\"/\")\n    return remote_url\n\n\ndef _git_remote_path_hash(scm: Optional[\"Base\"]) -> Optional[str]:\n    \"\"\"Return a hash of the git remote path.\"\"\"\n    import hashlib\n\n    try:\n        if remote_url := _git_remote_url(scm):\n            path = _parse_git_remote_path(remote_url)\n            h = hashlib.md5(path.encode(\"utf-8\"), usedforsecurity=False)  # for FIPS\n            return h.hexdigest()\n    except Exception:\n        logger.debug(\"Failed to get git remote path\", exc_info=True)\n    return None\n\n\ndef _runtime_info():\n    \"\"\"\n    Gather information from the environment where DVC runs to fill a report.\n    \"\"\"\n    from iterative_telemetry import _generate_ci_id, find_or_create_user_id\n\n    from dvc import __version__\n    from dvc.info import _get_remotes\n    from dvc.repo import Repo\n    from dvc.utils import is_binary\n\n    ci_id = _generate_ci_id()\n    if ci_id:\n        group_id, user_id = ci_id\n    else:\n        group_id, user_id = None, find_or_create_user_id()\n\n    scm = None\n    remotes = None\n    try:\n        repo = Repo()\n        scm = repo.scm\n        remotes = _get_remotes(repo.config)\n    except Exception as exc:  # noqa: BLE001\n        logger.debug(\"failed to open repo: %s\", exc)\n\n    return {\n        \"dvc_version\": __version__,\n        \"is_binary\": is_binary(),\n        \"scm_class\": _scm_in_use(scm),\n        \"system_info\": _system_info(),\n        \"user_id\": user_id,\n        \"group_id\": group_id,\n        \"remotes\": remotes,\n        \"git_remote_hash\": _git_remote_path_hash(scm),\n    }\n\n\ndef _system_info():\n    import platform\n    import sys\n\n    import distro\n\n    system = platform.system()\n\n    if system == \"Windows\":\n        version = sys.getwindowsversion()  # type: ignore[attr-defined]\n\n        return {\n            \"os\": \"windows\",\n            \"windows_version_build\": version.build,\n            \"windows_version_major\": version.major,\n            \"windows_version_minor\": version.minor,\n            \"windows_version_service_pack\": version.service_pack,\n        }\n\n    if system == \"Darwin\":\n        return {\"os\": \"mac\", \"mac_version\": platform.mac_ver()[0]}\n\n    if system == \"Linux\":\n        return {\n            \"os\": \"linux\",\n            \"linux_distro\": distro.id(),\n            \"linux_distro_like\": distro.like(),\n            \"linux_distro_version\": distro.version(),\n        }\n\n    # We don't collect data for any other system.\n    raise NotImplementedError\n"
  },
  {
    "path": "dvc/annotations.py",
    "content": "from dataclasses import asdict, dataclass, field, fields\nfrom typing import Any, ClassVar, Optional\n\nfrom funcy import compact\nfrom voluptuous import Required\n\n\n@dataclass\nclass Annotation:\n    PARAM_DESC: ClassVar[str] = \"desc\"\n    PARAM_TYPE: ClassVar[str] = \"type\"\n    PARAM_LABELS: ClassVar[str] = \"labels\"\n    PARAM_META: ClassVar[str] = \"meta\"\n\n    desc: Optional[str] = None\n    type: Optional[str] = None\n    labels: list[str] = field(default_factory=list)\n    meta: dict[str, Any] = field(default_factory=dict)\n\n    def to_dict(self) -> dict[str, str]:\n        return compact(asdict(self))\n\n\n@dataclass\nclass Artifact:\n    PARAM_PATH: ClassVar[str] = \"path\"\n    PARAM_DESC: ClassVar[str] = \"desc\"\n    PARAM_TYPE: ClassVar[str] = \"type\"\n    PARAM_LABELS: ClassVar[str] = \"labels\"\n    PARAM_META: ClassVar[str] = \"meta\"\n\n    path: str\n    desc: Optional[str] = None\n    type: Optional[str] = None\n    labels: list[str] = field(default_factory=list)\n    meta: dict[str, Any] = field(default_factory=dict)\n\n    def to_dict(self) -> dict[str, str]:\n        return compact(asdict(self))\n\n\nANNOTATION_FIELDS = [field.name for field in fields(Annotation)]\nANNOTATION_SCHEMA = {\n    Annotation.PARAM_DESC: str,\n    Annotation.PARAM_TYPE: str,\n    Annotation.PARAM_LABELS: [str],\n    Annotation.PARAM_META: object,\n}\nARTIFACT_SCHEMA: dict[Any, Any] = {\n    Required(Artifact.PARAM_PATH): str,\n    **ANNOTATION_SCHEMA,  # type: ignore[arg-type]\n}\n"
  },
  {
    "path": "dvc/api/__init__.py",
    "content": "from dvc.fs.dvc import _DVCFileSystem as DVCFileSystem\n\nfrom .artifacts import artifacts_show\nfrom .data import get_url, open, read  # noqa: A004\nfrom .dataset import get as get_dataset\nfrom .experiments import exp_save, exp_show\nfrom .scm import all_branches, all_commits, all_tags\nfrom .show import metrics_show, params_show\n\n__all__ = [\n    \"DVCFileSystem\",\n    \"all_branches\",\n    \"all_commits\",\n    \"all_tags\",\n    \"artifacts_show\",\n    \"exp_save\",\n    \"exp_show\",\n    \"get_dataset\",\n    \"get_url\",\n    \"metrics_show\",\n    \"open\",\n    \"params_show\",\n    \"read\",\n]\n"
  },
  {
    "path": "dvc/api/artifacts.py",
    "content": "import os\nfrom typing import Any, Optional\n\nfrom dvc.repo import Repo\n\n\ndef artifacts_show(\n    name: str,\n    version: Optional[str] = None,\n    stage: Optional[str] = None,\n    repo: Optional[str] = None,\n) -> dict[str, str]:\n    \"\"\"\n    Return path and Git revision for an artifact in a DVC project.\n\n    The resulting path and revision can be used in conjunction with other dvc.api\n    calls to open and read the artifact.\n\n    Args:\n        name (str): name of the artifact to open.\n        version (str, optional): version of the artifact to open. Defaults to\n            the latest version.\n        stage (str, optional): name of the model registry stage.\n        repo: (str, optional): path or URL for the DVC repo.\n\n    Returns:\n        Dictionary of the form:\n            {\n                \"rev\": ...,\n                \"path\": ...,\n            }\n\n    Raises:\n        dvc.exceptions.ArtifactNotFoundError: The specified artifact was not found in\n            the repo.\n    \"\"\"\n    if version and stage:\n        raise ValueError(\"Artifact version and stage are mutually exclusive.\")\n\n    from dvc.repo.artifacts import Artifacts\n    from dvc.utils import as_posix\n\n    repo_kwargs: dict[str, Any] = {\n        \"subrepos\": True,\n        \"uninitialized\": True,\n    }\n\n    dirname, _ = Artifacts.parse_path(name)\n    with Repo.open(repo, **repo_kwargs) as _repo:\n        rev = _repo.artifacts.get_rev(name, version=version, stage=stage)\n        with _repo.switch(rev):\n            root = _repo.fs.root_marker\n            _dirname = _repo.fs.join(root, dirname) if dirname else root\n            with Repo(_dirname, fs=_repo.fs, scm=_repo.scm) as r:\n                path = r.artifacts.get_path(name)\n                path = _repo.fs.join(_repo.fs.root_marker, as_posix(path))\n                parts = _repo.fs.relparts(path, _repo.root_dir)\n                return {\"rev\": rev, \"path\": os.path.join(*parts)}\n"
  },
  {
    "path": "dvc/api/data.py",
    "content": "from contextlib import _GeneratorContextManager as GCM\nfrom contextlib import contextmanager\nfrom typing import Any, Optional\n\nfrom funcy import reraise\n\nfrom dvc.exceptions import FileMissingError, OutputNotFoundError, PathMissingError\nfrom dvc.repo import Repo\n\n\n@contextmanager\ndef _wrap_exceptions(repo, url):\n    from dvc.config import NoRemoteError\n    from dvc.exceptions import NoOutputInExternalRepoError, NoRemoteInExternalRepoError\n\n    try:\n        yield\n    except NoRemoteError as exc:\n        raise NoRemoteInExternalRepoError(url) from exc\n    except OutputNotFoundError as exc:\n        if exc.repo is repo:\n            raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc\n        raise\n    except FileMissingError as exc:\n        raise PathMissingError(exc.path, url) from exc\n\n\ndef get_url(\n    path: str,\n    repo: Optional[str] = None,\n    rev: Optional[str] = None,\n    remote: Optional[str] = None,\n    config: Optional[dict[str, Any]] = None,\n    remote_config: Optional[dict[str, Any]] = None,\n):\n    \"\"\"\n    Returns the URL to the storage location of a data file or directory tracked\n    in a DVC repo. For Git repos, HEAD is used unless a rev argument is\n    supplied. The default remote is tried unless a remote argument is supplied.\n\n    Raises OutputNotFoundError if the file is not tracked by DVC.\n\n    NOTE: This function does not check for the actual existence of the file or\n    directory in the remote storage.\n\n    Args:\n        path (str): location and file name of the target, relative to the root\n            of `repo`.\n        repo (str, optional): location of the DVC project or Git Repo.\n            Defaults to the current DVC project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n        rev (str, optional): Any `Git revision`_ such as a branch or tag name,\n            a commit hash or a dvc experiment name.\n            Defaults to HEAD.\n            If `repo` is not a Git repo, this option is ignored.\n        remote (str, optional): Name of the `DVC remote`_ used to form the\n            returned URL string.\n            Defaults to the `default remote`_ of `repo`.\n            For local projects, the cache is tried before the default remote.\n        config(dict, optional): config to be passed to the DVC repository.\n            Defaults to None.\n        remote_config(dict, optional): remote config to be passed to the DVC\n            repository.\n            Defaults to None.\n\n    Returns:\n        str: URL to the file or directory.\n    \"\"\"\n    from dvc.config import NoRemoteError\n    from dvc_data.index import StorageKeyError\n\n    with Repo.open(\n        repo,\n        rev=rev,\n        subrepos=True,\n        uninitialized=True,\n        remote=remote,\n        config=config,\n        remote_config=remote_config,\n    ) as _repo:\n        index, entry = _repo.get_data_index_entry(path)\n        with reraise(\n            (StorageKeyError, ValueError),\n            NoRemoteError(f\"no remote specified in {_repo}\"),\n        ):\n            remote_fs, remote_path = index.storage_map.get_remote(entry)\n            return remote_fs.unstrip_protocol(remote_path)\n\n\nclass _OpenContextManager(GCM):\n    def __init__(self, func, args, kwds):\n        self.gen = func(*args, **kwds)\n        self.func, self.args, self.kwds = (func, args, kwds)  # type: ignore[assignment]\n\n    def __getattr__(self, name):\n        raise AttributeError(\"dvc.api.open() should be used in a with statement.\")\n\n\ndef open(  # noqa: A001\n    path: str,\n    repo: Optional[str] = None,\n    rev: Optional[str] = None,\n    remote: Optional[str] = None,\n    mode: str = \"r\",\n    encoding: Optional[str] = None,\n    config: Optional[dict[str, Any]] = None,\n    remote_config: Optional[dict[str, Any]] = None,\n):\n    \"\"\"\n    Opens a file tracked in a DVC project.\n\n    This function may only be used as a context manager (using the `with`\n    keyword, as shown in the examples).\n\n    This function makes a direct connection to the remote storage, so the file\n    contents can be streamed. Your code can process the data buffer as it's\n    streamed, which optimizes memory usage.\n\n    Note:\n        Use dvc.api.read() to load the complete file contents\n        in a single function call, no context manager involved.\n        Neither function utilizes disc space.\n\n    Args:\n        path (str): location and file name of the target to open,\n        relative to the root of `repo`.\n        repo (str, optional): location of the DVC project or Git Repo.\n            Defaults to the current DVC project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n        rev (str, optional): Any `Git revision`_ such as a branch or tag name,\n            a commit hash or a dvc experiment name.\n            Defaults to HEAD.\n            If `repo` is not a Git repo, this option is ignored.\n        remote (str, optional): Name of the `DVC remote`_ used to form the\n            returned URL string.\n            Defaults to the `default remote`_ of `repo`.\n            For local projects, the cache is tried before the default remote.\n        mode (str, optional): Specifies the mode in which the file is opened.\n            Defaults to \"r\" (read).\n            Mirrors the namesake parameter in builtin `open()`_.\n            Only reading `mode` is supported.\n        encoding(str, optional): `Codec`_ used to decode the file contents.\n            Defaults to None.\n            This should only be used in text mode.\n            Mirrors the namesake parameter in builtin `open()`_.\n        config(dict, optional): config to be passed to the DVC repository.\n            Defaults to None.\n        remote_config(dict, optional): remote config to be passed to the DVC\n            repository.\n            Defaults to None.\n\n    Returns:\n        _OpenContextManager: A context manager that generatse a corresponding\n            `file object`_.\n            The exact type of file object depends on the mode used.\n            For more details, please refer to Python's `open()`_ built-in,\n            which is used under the hood.\n\n    Raises:\n        AttributeError: If this method is not used as a context manager.\n        ValueError: If non-read `mode` is used.\n\n    Examples:\n\n        - Use data or models from a DVC repository.\n\n        Any file tracked in a DVC project (and stored remotely) can be\n        processed directly in your Python code with this API.\n        For example, an XML file tracked in a public DVC repo on GitHub can be\n        processed like this:\n\n        >>> from xml.sax import parse\n        >>> import dvc.api\n        >>> from mymodule import mySAXHandler\n\n        >>> with dvc.api.open(\n        ...     'get-started/data.xml',\n        ...     repo='https://github.com/iterative/dataset-registry'\n        ... ) as fd:\n        ...     parse(fd, mySAXHandler)\n\n        We use a SAX XML parser here because dvc.api.open() is able to stream\n        the data from remote storage.\n        The mySAXHandler object should handle the event-driven parsing of the\n        document in this case.\n        This increases the performance of the code (minimizing memory usage),\n        and is typically faster than loading the whole data into memory.\n\n        - Accessing private repos\n\n        This is just a matter of using the right repo argument, for example an\n        SSH URL (requires that the credentials are configured locally):\n\n        >>> import dvc.api\n\n        >>> with dvc.api.open(\n        ...     'features.dat',\n        ...     repo='git@server.com:path/to/repo.git'\n        ... ) as fd:\n        ...     # ... Process 'features'\n        ...     pass\n\n        - Use different versions of data\n\n        Any git revision (see `rev`) can be accessed programmatically.\n        For example, if your DVC repo has tagged releases of a CSV dataset:\n\n        >>> import csv\n        >>> import dvc.api\n        >>> with dvc.api.open(\n        ...     'clean.csv',\n        ...     rev='v1.1.0'\n        ... ) as fd:\n        ...     reader = csv.reader(fd)\n        ...     # ... Process 'clean' data from version 1.1.0\n\n    .. _Git revision:\n        https://git-scm.com/docs/revisions\n\n    .. _DVC remote:\n        https://dvc.org/doc/command-reference/remote\n\n    .. _default remote:\n        https://dvc.org/doc/command-reference/remote/default\n\n    .. _open():\n        https://docs.python.org/3/library/functions.html#open\n\n    .. _Codec:\n        https://docs.python.org/3/library/codecs.html#standard-encodings\n\n    .. _file object:\n        https://docs.python.org/3/glossary.html#term-file-object\n\n    \"\"\"\n    if \"r\" not in mode:\n        raise ValueError(\"Only reading `mode` is supported.\")\n\n    args = (path,)\n    kwargs = {\n        \"repo\": repo,\n        \"remote\": remote,\n        \"rev\": rev,\n        \"mode\": mode,\n        \"encoding\": encoding,\n        \"config\": config,\n        \"remote_config\": remote_config,\n    }\n    return _OpenContextManager(_open, args, kwargs)\n\n\ndef _open(\n    path,\n    repo=None,\n    rev=None,\n    remote=None,\n    mode=\"r\",\n    encoding=None,\n    config=None,\n    remote_config=None,\n):\n    repo_kwargs: dict[str, Any] = {\n        \"subrepos\": True,\n        \"uninitialized\": True,\n        \"remote\": remote,\n        \"config\": config,\n        \"remote_config\": remote_config,\n    }\n\n    with Repo.open(repo, rev=rev, **repo_kwargs) as _repo:\n        with _wrap_exceptions(_repo, path):\n            import os\n            from typing import TYPE_CHECKING, Union\n\n            from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError\n            from dvc.fs.data import DataFileSystem\n            from dvc.fs.dvc import DVCFileSystem\n\n            if TYPE_CHECKING:\n                from dvc.fs import FileSystem\n\n            fs: Union[FileSystem, DataFileSystem, DVCFileSystem]\n            if os.path.isabs(path):\n                fs = DataFileSystem(index=_repo.index.data[\"local\"])\n                fs_path = path\n            else:\n                fs = DVCFileSystem(repo=_repo, subrepos=True)\n                fs_path = fs.from_os_path(path)\n\n            try:\n                with fs.open(fs_path, mode=mode, encoding=encoding) as fobj:\n                    yield fobj\n            except FileNotFoundError as exc:\n                raise FileMissingError(path) from exc\n            except IsADirectoryError as exc:\n                raise DvcIsADirectoryError(f\"'{path}' is a directory\") from exc\n\n\ndef read(\n    path,\n    repo=None,\n    rev=None,\n    remote=None,\n    mode=\"r\",\n    encoding=None,\n    config=None,\n    remote_config=None,\n):\n    \"\"\"\n    Returns the contents of a tracked file (by DVC or Git). For Git repos, HEAD\n    is used unless a rev argument is supplied. The default remote is tried\n    unless a remote argument is supplied.\n    \"\"\"\n    with open(\n        path,\n        repo=repo,\n        rev=rev,\n        remote=remote,\n        mode=mode,\n        encoding=encoding,\n        config=config,\n        remote_config=remote_config,\n    ) as fd:\n        return fd.read()\n"
  },
  {
    "path": "dvc/api/dataset.py",
    "content": "from typing import Literal, TypedDict, Union\n\n\nclass DatachainDataset(TypedDict):\n    type: Literal[\"dc\"]\n    name: str\n    version: int\n\n\nclass DVCDataset(TypedDict):\n    type: Literal[\"dvc\"]\n    url: str\n    path: str\n    sha: str\n\n\nclass URLDataset(TypedDict):\n    type: Literal[\"url\"]\n    files: list[str]\n    path: str\n\n\ndef get(name: str) -> Union[DatachainDataset, DVCDataset, URLDataset]:\n    from difflib import get_close_matches\n\n    from dvc.fs import get_cloud_fs\n    from dvc.repo import Repo, datasets\n\n    repo = Repo()\n    try:\n        dataset = repo.datasets[name]\n    except datasets.DatasetNotFoundError as e:\n        add_note = getattr(e, \"add_note\", lambda _: None)\n        if matches := get_close_matches(name, repo.datasets):\n            add_note(f\"Did you mean: {matches[0]!r}?\")\n        raise\n\n    if dataset._invalidated:\n        raise ValueError(f\"dataset not in sync. Sync with 'dvc ds update {name}'.\")\n    if not dataset.lock:\n        raise ValueError(\"missing lock information\")\n    if dataset.type == \"dvc\":\n        return DVCDataset(\n            type=\"dvc\",\n            url=dataset.lock.url,\n            path=dataset.lock.path,\n            sha=dataset.lock.rev_lock,\n        )\n    if dataset.type == \"dc\":\n        return DatachainDataset(\n            type=\"dc\", name=dataset.name_version[0], version=dataset.lock.version\n        )\n    if dataset.type == \"url\":\n        fs_cls, _, path = get_cloud_fs(repo.config, url=dataset.lock.url)\n        assert fs_cls\n        join_version = getattr(fs_cls, \"join_version\", lambda path, _: path)\n        protocol = fs_cls.protocol\n        versioned_path = join_version(path, dataset.lock.meta.version_id)\n        versioned_path = f\"{protocol}://{versioned_path}\"\n        files = [\n            join_version(\n                fs_cls.join(versioned_path, file.relpath), file.meta.version_id\n            )\n            for file in dataset.lock.files\n        ]\n        return URLDataset(type=\"url\", files=files, path=versioned_path)\n    raise AssertionError(\"unreachable\")\n"
  },
  {
    "path": "dvc/api/experiments.py",
    "content": "from typing import Optional, Union\n\nfrom rich.text import Text\n\nfrom dvc.repo import Repo\nfrom dvc.repo.experiments.show import tabulate\n\n\ndef exp_save(\n    name: Optional[str] = None,\n    force: bool = False,\n    include_untracked: Optional[list[str]] = None,\n):\n    \"\"\"\n    Create a new DVC experiment using `exp save`.\n\n    See https://dvc.org/doc/command-reference/exp/save.\n\n    Args:\n        name (str, optional): specify a name for this experiment.\n            If `None`, a default one will be generated, such as `urban-sign`.\n            Defaults to `None`.\n        force (bool):  overwrite the experiment if an experiment with the same\n            name already exists.\n            Defaults to `False`.\n        include_untracked (List[str], optional): specify untracked file(s) to\n            be included in the saved experiment.\n            Defaults to `None`.\n\n    Returns:\n        str: The `Git revision`_ of the created experiment.\n\n    Raises:\n        ExperimentExistsError: If an experiment with `name` already exists and\n            `force=False`.\n\n    .. _Git revision:\n        https://git-scm.com/docs/revisions\n    \"\"\"\n    with Repo() as repo:\n        return repo.experiments.save(\n            name=name, force=force, include_untracked=include_untracked\n        )\n\n\ndef _postprocess(exp_rows):\n    for exp_row in exp_rows:\n        for k, v in exp_row.items():\n            if isinstance(v, Text):\n                v_str = str(v)\n                try:\n                    exp_row[k] = float(v_str)\n                except ValueError:\n                    exp_row[k] = v_str\n\n            elif not exp_row[k]:\n                exp_row[k] = None\n\n    return exp_rows\n\n\ndef exp_show(\n    repo: Optional[str] = None,\n    revs: Optional[Union[str, list[str]]] = None,\n    num: int = 1,\n    param_deps: bool = False,\n    force: bool = False,\n    config: Optional[dict] = None,\n) -> list[dict]:\n    \"\"\"Get DVC experiments tracked in `repo`.\n\n    Without arguments, this function will retrieve all experiments derived from\n    the Git `HEAD`.\n\n    See the options below to customize the experiments retrieved.\n\n    Args:\n        repo (str, optional): location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n        revs (Union[str, List[str]], optional): Git revision(s) (e.g. branch,\n            tag, SHA commit) to use as a reference point to start listing\n            experiments.\n            Defaults to `None`, which will use `HEAD` as starting point.\n        num (int, optional): show experiments from the last `num` commits\n            (first parents) starting from the `revs` baseline.\n            Give a negative value to include all first-parent commits (similar\n            to `git log -n`).\n            Defaults to 1.\n        param_deps (bool, optional): include only parameters that are stage\n            dependencies.\n            Defaults to `False`.\n        force (bool, optional): force re-collection of experiments instead of\n            loading from internal experiments cache.\n            DVC caches `exp_show` data for completed experiments to improve\n            performance of subsequent calls.\n            When `force` is specified, DVC will reload all experiment data and\n            ignore any previously cached results.\n            Defaults to `False`.\n        config (dict, optional): config to be passed through to DVC project.\n            Defaults to `None`.\n\n    Returns:\n        List[Dict]: Each item in the list will contain a dictionary with\n            the info for an individual experiment.\n            See Examples below.\n    \"\"\"\n    with Repo.open(repo, config=config) as _repo:\n        experiments = _repo.experiments.show(\n            revs=revs,\n            num=num,\n            param_deps=param_deps,\n            force=force,\n        )\n        td, _ = tabulate(experiments, fill_value=None)\n\n        return _postprocess(td.as_dict())\n"
  },
  {
    "path": "dvc/api/scm.py",
    "content": "from typing import Optional\n\nfrom dvc.repo import Repo\n\n\ndef all_branches(repo: Optional[str] = None) -> list[str]:\n    \"\"\"Get all Git branches in a DVC repository.\n\n    Args:\n        repo (str, optional): location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n    Returns:\n        List[str]: Names of the Git branches.\n    \"\"\"\n    with Repo.open(repo) as _repo:\n        return _repo.scm.list_branches()\n\n\ndef all_commits(repo: Optional[str] = None) -> list[str]:\n    \"\"\"Get all Git commits in a DVC repository.\n\n    Args:\n        repo (str, optional): location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n    Returns:\n        List[str]: SHAs of the Git commits.\n    \"\"\"\n    with Repo.open(repo) as _repo:\n        return _repo.scm.list_all_commits()\n\n\ndef all_tags(repo: Optional[str] = None) -> list[str]:\n    \"\"\"Get all Git tags in a DVC repository.\n\n    Args:\n        repo (str, optional): location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n    Returns:\n        List[str]: Names of the Git tags.\n    \"\"\"\n    with Repo.open(repo) as _repo:\n        return _repo.scm.list_tags()\n"
  },
  {
    "path": "dvc/api/show.py",
    "content": "import typing\nfrom collections import Counter\nfrom collections.abc import Iterable\nfrom typing import Optional, Union\n\nfrom funcy import first\n\nfrom dvc.repo import Repo\n\n\ndef _postprocess(results):\n    processed: dict[str, dict] = {}\n    for rev, rev_data in results.items():\n        if not rev_data:\n            continue\n\n        processed[rev] = {}\n\n        counts: typing.Counter[str] = Counter()\n        for file_data in rev_data[\"data\"].values():\n            for k in file_data[\"data\"]:\n                counts[k] += 1\n        for file_name, file_data in rev_data[\"data\"].items():\n            to_merge = {\n                (k if counts[k] == 1 else f\"{file_name}:{k}\"): v\n                for k, v in file_data[\"data\"].items()\n            }\n            processed[rev] = processed[rev] | to_merge\n\n    processed.pop(\"workspace\", None)\n\n    return processed\n\n\ndef metrics_show(\n    *targets: str,\n    repo: Optional[str] = None,\n    rev: Optional[str] = None,\n    config: Optional[dict] = None,\n) -> dict:\n    \"\"\"Get metrics tracked in `repo`.\n\n    Without arguments, this function will retrieve all metrics from all tracked\n    metric files, for the current working tree.\n\n    See the options below to restrict the metrics retrieved.\n\n    Args:\n        *targets (str, optional): Names of the metric files to retrieve\n        metrics from. For example, \"classifier_eval.json,\n        clustering_eval.json\".\n        If no `targets` are provided, all metric files tracked in `dvc.yaml`\n        will be used.\n        Note that targets don't necessarily have to be defined in `dvc.yaml`.\n        repo (str, optional): Location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n        rev (str, optional): Name of the `Git revision`_ to retrieve metrics\n            from.\n            Defaults to `None`.\n            An example of git revision can be a branch or tag name, a commit\n            hash or a dvc experiment name.\n            If `repo` is not a Git repo, this option is ignored.\n            If `None`, the current working tree will be used.\n        config (dict, optional): config to be passed through to DVC project.\n            Defaults to `None`.\n\n    Returns:\n        Dict: See Examples below.\n\n    Examples:\n\n        - No arguments.\n\n        Working on https://github.com/iterative/example-get-started\n\n        >>> import dvc.api\n        >>> metrics = dvc.api.metrics_show()\n        >>> print(json.dumps(metrics, indent=4))\n        {\n            \"avg_prec\": 0.9249974999612706,\n            \"roc_auc\": 0.9460213440787918\n        }\n\n        ---\n\n        - Using `rev`.\n\n        Working on https://github.com/iterative/example-get-started\n\n        >>> import json\n        >>> import dvc.api\n        >>> metrics = dvc.api.metrics_show(rev=\"tune-hyperparams\")\n        >>> print(json.dumps(metrics, indent=4))\n        {\n            \"avg_prec\": 0.9268792615819422,\n            \"roc_auc\": 0.945093365854111\n        }\n\n        ---\n\n        - Using `targets`.\n\n        Working on https://github.com/iterative/example-get-started\n\n        >>> import json\n        >>> import dvc.api\n        >>> metrics = dvc.api.metrics_show(\"evaluation.json\")\n        >>> print(json.dumps(metrics, indent=4))\n        {\n            \"avg_prec\": 0.9249974999612706,\n            \"roc_auc\": 0.9460213440787918\n        }\n\n        ---\n\n        - Git URL as `repo`.\n\n        >>> import json\n        >>> import dvc.api\n        >>> metrics = dvc.api.metrics_show(\n        ...     repo=\"https://github.com/iterative/demo-fashion-mnist\")\n        >>> print(json.dumps(metrics, indent=4))\n        {\n            \"loss\": 0.25284987688064575,\n            \"accuracy\": 0.9071000218391418\n        }\n\n\n    .. _Git revision:\n        https://git-scm.com/docs/revisions\n    \"\"\"\n    from dvc.repo.metrics.show import to_relpath\n\n    with Repo.open(repo, config=config) as _repo:\n        metrics = _repo.metrics.show(\n            targets=targets,\n            revs=rev if rev is None else [rev],\n            on_error=\"raise\",\n        )\n        metrics = {\n            k: to_relpath(_repo.fs, _repo.root_dir, v) for k, v in metrics.items()\n        }\n\n    metrics = _postprocess(metrics)\n\n    if not metrics:\n        return {}\n\n    return metrics[first(metrics)]\n\n\ndef params_show(\n    *targets: str,\n    repo: Optional[str] = None,\n    stages: Optional[Union[str, Iterable[str]]] = None,\n    rev: Optional[str] = None,\n    deps: bool = False,\n    config: Optional[dict] = None,\n) -> dict:\n    \"\"\"Get parameters tracked in `repo`.\n\n    Without arguments, this function will retrieve all params from all tracked\n    parameter files, for the current working tree.\n\n    See the options below to restrict the parameters retrieved.\n\n    Args:\n        *targets (str, optional): Names of the parameter files to retrieve\n        params from. For example, \"params.py, myparams.toml\".\n        If no `targets` are provided, all parameter files tracked in `dvc.yaml`\n        will be used.\n        Note that targets don't necessarily have to be defined in `dvc.yaml`.\n        repo (str, optional): location of the DVC repository.\n            Defaults to the current project (found by walking up from the\n            current working directory tree).\n            It can be a URL or a file system path.\n            Both HTTP and SSH protocols are supported for online Git repos\n            (e.g. [user@]server:project.git).\n        stages (Union[str, Iterable[str]], optional): Name or names of the\n            stages to retrieve parameters from.\n            Defaults to `None`.\n            If `None`, all parameters from all stages will be retrieved.\n            If this method is called from a different location to the one where\n            the `dvc.yaml` is found, the relative path to the `dvc.yaml` must\n            be provided as a prefix with the syntax `{relpath}:{stage}`.\n            For example: `subdir/dvc.yaml:stage-0` or `../dvc.yaml:stage-1`.\n        rev (str, optional): Name of the `Git revision`_ to retrieve parameters\n            from.\n            Defaults to `None`.\n            An example of git revision can be a branch or tag name, a commit\n            hash or a dvc experiment name.\n            If `repo` is not a Git repo, this option is ignored.\n            If `None`, the current working tree will be used.\n        deps (bool, optional): Whether to retrieve only parameters that are\n            stage dependencies or not.\n            Defaults to `False`.\n        config (dict, optional): config to be passed through to DVC project.\n            Defaults to `None`.\n\n    Returns:\n        Dict: See Examples below.\n\n    Examples:\n\n        - No arguments.\n\n        Working on https://github.com/iterative/example-get-started\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show()\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"prepare\": {\n                \"split\": 0.2,\n                \"seed\": 20170428\n            },\n            \"featurize\": {\n                \"max_features\": 200,\n                \"ngrams\": 2\n            },\n            \"train\": {\n                \"seed\": 20170428,\n                \"n_est\": 50,\n                \"min_split\": 0.01\n            }\n        }\n\n        ---\n\n        - Filtering with `stages`.\n\n        Working on https://github.com/iterative/example-get-started\n\n        `stages` can a single string:\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(stages=\"prepare\")\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"prepare\": {\n                \"split\": 0.2,\n                \"seed\": 20170428\n            }\n        }\n\n        Or an iterable of strings:\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(stages=[\"prepare\", \"train\"])\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"prepare\": {\n                \"split\": 0.2,\n                \"seed\": 20170428\n            },\n            \"train\": {\n                \"seed\": 20170428,\n                \"n_est\": 50,\n                \"min_split\": 0.01\n            }\n        }\n\n        ---\n\n        - Using `rev`.\n\n        Working on https://github.com/iterative/example-get-started\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(rev=\"tune-hyperparams\")\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"prepare\": {\n                \"split\": 0.2,\n                \"seed\": 20170428\n            },\n            \"featurize\": {\n                \"max_features\": 200,\n                \"ngrams\": 2\n            },\n            \"train\": {\n                \"seed\": 20170428,\n                \"n_est\": 100,\n                \"min_split\": 8\n            }\n        }\n\n        ---\n\n        - Using `targets`.\n\n        Working on `multi-params-files` folder of\n        https://github.com/iterative/pipeline-conifguration\n\n        You can pass a single target:\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(\"params.yaml\")\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"run_mode\": \"prod\",\n            \"configs\": {\n                \"dev\": \"configs/params_dev.yaml\",\n                \"test\": \"configs/params_test.yaml\",\n                \"prod\": \"configs/params_prod.yaml\"\n            },\n            \"evaluate\": {\n                \"dataset\": \"micro\",\n                \"size\": 5000,\n                \"metrics\": [\"f1\", \"roc-auc\"],\n                \"metrics_file\": \"reports/metrics.json\",\n                \"plots_cm\": \"reports/plot_confusion_matrix.png\"\n            }\n        }\n\n\n        Or multiple targets:\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(\n        ...     \"configs/params_dev.yaml\", \"configs/params_prod.yaml\")\n        >>> print(json.dumps(params, indent=4))\n        {\n            \"configs/params_prod.yaml:run_mode\": \"prod\",\n            \"configs/params_prod.yaml:config_file\": \"configs/params_prod.yaml\",\n            \"configs/params_prod.yaml:data_load\": {\n                \"dataset\": \"large\",\n                \"sampling\": {\n                \"enable\": true,\n                \"size\": 50000\n                }\n            },\n            \"configs/params_prod.yaml:train\": {\n                \"epochs\": 1000\n            },\n            \"configs/params_dev.yaml:run_mode\": \"dev\",\n            \"configs/params_dev.yaml:config_file\": \"configs/params_dev.yaml\",\n            \"configs/params_dev.yaml:data_load\": {\n                \"dataset\": \"development\",\n                \"sampling\": {\n                \"enable\": true,\n                \"size\": 1000\n                }\n            },\n            \"configs/params_dev.yaml:train\": {\n                \"epochs\": 10\n            }\n        }\n\n        ---\n\n        - Git URL as `repo`.\n\n        >>> import json\n        >>> import dvc.api\n        >>> params = dvc.api.params_show(\n        ...     repo=\"https://github.com/iterative/demo-fashion-mnist\")\n        {\n            \"train\": {\n                \"batch_size\": 128,\n                \"hidden_units\": 64,\n                \"dropout\": 0.4,\n                \"num_epochs\": 10,\n                \"lr\": 0.001,\n                \"conv_activation\": \"relu\"\n            }\n        }\n\n\n    .. _Git revision:\n        https://git-scm.com/docs/revisions\n\n    \"\"\"\n    from dvc.repo.metrics.show import to_relpath\n\n    if isinstance(stages, str):\n        stages = [stages]\n\n    with Repo.open(repo, config=config) as _repo:\n        params = _repo.params.show(\n            revs=rev if rev is None else [rev],\n            targets=targets,\n            deps_only=deps,\n            on_error=\"raise\",\n            stages=stages,\n        )\n        params = {k: to_relpath(_repo.fs, _repo.root_dir, v) for k, v in params.items()}\n\n    params = _postprocess(params)\n\n    if not params:\n        return {}\n\n    return params[first(params)]\n"
  },
  {
    "path": "dvc/build.py",
    "content": "from typing import Optional\n\ntry:\n    # file is created during dvc build\n    from . import _build  # type: ignore[attr-defined, import]\n\n    PKG: Optional[str] = _build.PKG  # type: ignore[assignment]\nexcept ImportError:\n    PKG = None  # type: ignore[assignment]\n"
  },
  {
    "path": "dvc/cachemgr.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.fs import GitFileSystem, Schemes\nfrom dvc_data.hashfile.db import get_odb\nfrom dvc_data.hashfile.hash import DEFAULT_ALGORITHM\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\nLEGACY_HASH_NAMES = {\"md5-dos2unix\", \"params\"}\n\n\ndef _get_odb(\n    repo,\n    settings,\n    fs=None,\n    prefix: Optional[tuple[str, ...]] = None,\n    hash_name: Optional[str] = None,\n    **kwargs,\n):\n    from dvc.fs import get_cloud_fs\n\n    if not settings:\n        return None\n\n    cls, config, fs_path = get_cloud_fs(repo.config, **settings)\n    fs = fs or cls(**config)\n    if prefix:\n        fs_path = fs.join(fs_path, *prefix)\n    if hash_name:\n        config[\"hash_name\"] = hash_name\n    return get_odb(fs, fs_path, state=repo.state, **config)\n\n\nclass CacheManager:\n    CACHE_DIR = \"cache\"\n    FILES_DIR = \"files\"\n    FS_DIR = \"fs\"\n\n    def __init__(self, repo):\n        self._repo = repo\n        self.config = config = repo.config[\"cache\"]\n        self._odb = {}\n\n        local = config.get(\"local\")\n        default = self.default_local_cache_dir\n\n        if local:\n            settings = {\"name\": local}\n        elif \"dir\" not in config and not default:\n            settings = None\n        else:\n            from dvc.config_schema import LOCAL_COMMON\n\n            url = config.get(\"dir\") or default\n            settings = {\"url\": url}\n            for opt in LOCAL_COMMON:\n                if opt in config:\n                    settings[str(opt)] = config.get(opt)\n\n        kwargs = {}\n        if not isinstance(repo.fs, GitFileSystem):\n            kwargs[\"fs\"] = repo.fs\n\n        odb = _get_odb(\n            repo,\n            settings,\n            prefix=(self.FILES_DIR, DEFAULT_ALGORITHM),\n            **kwargs,\n        )\n        self._odb[\"repo\"] = odb\n        self._odb[Schemes.LOCAL] = odb\n        legacy_odb = _get_odb(repo, settings, hash_name=\"md5-dos2unix\", **kwargs)\n        self._odb[\"legacy\"] = legacy_odb\n\n    @property\n    def fs_cache(self):\n        \"\"\"Filesystem-based cache.\n\n        Currently used as a temporary location to download files that we don't\n        yet have a regular oid (e.g. md5) for.\n        \"\"\"\n        from dvc_data.index import FileStorage\n\n        return FileStorage(\n            key=(),\n            fs=self.local.fs,\n            path=self.local.fs.join(self.local_cache_dir, self.FS_DIR),\n        )\n\n    def _init_odb(self, schemes):\n        for scheme in schemes:\n            remote = self.config.get(scheme)\n            settings = {\"name\": remote} if remote else None\n            self._odb[scheme] = _get_odb(\n                self._repo,\n                settings,\n                prefix=(self.FILES_DIR, DEFAULT_ALGORITHM),\n            )\n\n    def __getattr__(self, name):\n        try:\n            return self._odb[name]\n        except KeyError as exc:\n            raise AttributeError from exc\n\n    def by_scheme(self):\n        yield from self._odb.items()\n\n    @property\n    def local_cache_dir(self) -> str:\n        \"\"\"Return base local cache directory without any prefixes.\n\n        (i.e. `dvc cache dir`).\n        \"\"\"\n        return self.legacy.path\n\n    @property\n    def default_local_cache_dir(self) -> Optional[str]:\n        repo = self._repo\n        if repo and repo.local_dvc_dir:\n            return os.path.join(repo.local_dvc_dir, self.CACHE_DIR)\n        return None\n\n\ndef migrate_2_to_3(repo: \"Repo\", dry: bool = False):\n    \"\"\"Migrate legacy 2.x objects to 3.x cache.\n\n    Legacy 'md5-dos2unix' objects will be re-hashed with 'md5', added to 3.x cache,\n    and then a link from the legacy 2.x location to the 3.x location will be created.\n    \"\"\"\n    from dvc.fs.callbacks import TqdmCallback\n    from dvc.ui import ui\n    from dvc_data.hashfile.db.migrate import migrate, prepare\n\n    src = repo.cache.legacy\n    dest = repo.cache.local\n    if dry:\n        oids = list(src._list_oids())\n        ui.write(\n            f\"{len(oids)} files will be re-hashed and migrated to the DVC 3.0 cache \"\n            \"location.\"\n        )\n        return\n\n    with TqdmCallback(desc=\"Computing DVC 3.0 hashes\", unit=\"files\") as cb:\n        migration = prepare(src, dest, callback=cb)\n\n    with TqdmCallback(desc=\"Migrating to DVC 3.0 cache\", unit=\"files\") as cb:\n        count = migrate(migration, callback=cb)\n    ui.write(f\"Migrated {count} files to DVC 3.0 cache location.\")\n"
  },
  {
    "path": "dvc/cli/__init__.py",
    "content": "\"\"\"This module provides an entrypoint to the dvc cli and parsing utils.\"\"\"\n\nimport logging\nimport sys\nfrom typing import Optional\n\nfrom dvc.log import logger\n\n# Workaround for CPython bug. See [1] and [2] for more info.\n# [1] https://github.com/aws/aws-cli/blob/1.16.277/awscli/clidriver.py#L55\n# [2] https://bugs.python.org/issue29288\n\"\".encode(\"idna\")\n\n\nlogger = logger.getChild(__name__)\n\n\nclass DvcParserError(Exception):\n    \"\"\"Base class for CLI parser errors.\"\"\"\n\n    def __init__(self):\n        super().__init__(\"parser error\")\n\n\ndef parse_args(argv=None):\n    \"\"\"Parses CLI arguments.\n\n    Args:\n        argv: optional list of arguments to parse. sys.argv is used by default.\n\n    Raises:\n        DvcParserError: raised for argument parsing errors.\n    \"\"\"\n    from .parser import get_main_parser\n\n    parser = get_main_parser()\n    args = parser.parse_args(argv)\n    args.parser = parser\n    return args\n\n\ndef _log_unknown_exceptions() -> None:\n    from dvc.info import get_dvc_info\n    from dvc.ui import ui\n    from dvc.utils import colorize\n\n    logger.exception(\"unexpected error\")\n    if logger.isEnabledFor(logging.DEBUG):\n        logger.debug(\"Version info for developers:\\n%s\", get_dvc_info())\n\n    q = colorize(\"Having any troubles?\", \"yellow\")\n    link = colorize(\"https://dvc.org/support\", \"blue\")\n    footer = f\"\\n{q} Hit us up at {link}, we are always happy to help!\"\n    ui.error_write(footer)\n\n\ndef _log_exceptions(exc: Exception) -> Optional[int]:\n    \"\"\"Try to log some known exceptions, that are not DVCExceptions.\"\"\"\n    from dvc.utils import error_link, format_link\n\n    if isinstance(exc, OSError):\n        import errno\n\n        if exc.errno == errno.EMFILE:\n            logger.exception(\n                (\n                    \"too many open files, please visit \"\n                    \"%s to see how to handle this problem\"\n                ),\n                error_link(\"many-files\"),\n                extra={\"tb_only\": True},\n            )\n        else:\n            _log_unknown_exceptions()\n        return None\n\n    from dvc.fs import AuthError, ConfigError, RemoteMissingDepsError\n\n    if isinstance(exc, RemoteMissingDepsError):\n        from dvc import PKG\n\n        proto = exc.protocol\n        by_pkg = {\n            \"pip\": f\"pip install 'dvc[{proto}]'\",\n            \"conda\": f\"conda install -c conda-forge dvc-{proto}\",\n        }\n\n        if PKG in by_pkg:\n            link = format_link(\"https://dvc.org/doc/install\")\n            cmd = by_pkg.get(PKG)\n            hint = (\n                \"To install dvc with those dependencies, run:\\n\"\n                \"\\n\"\n                f\"\\t{cmd}\\n\"\n                \"\\n\"\n                f\"See {link} for more info.\"\n            )\n        else:\n            link = format_link(\"https://github.com/treeverse/dvc/issues\")\n            hint = f\"\\nPlease report this bug to {link}. Thank you!\"\n\n        logger.exception(\n            \"URL '%s' is supported but requires these missing dependencies: %s. %s\",\n            exc.url,\n            exc.missing_deps,\n            hint,\n            extra={\"tb_only\": True},\n        )\n        return None\n\n    if isinstance(exc, (AuthError, ConfigError)):\n        link = format_link(\"https://man.dvc.org/remote/modify\")\n        logger.exception(\"configuration error\")\n        logger.exception(\n            \"%s\\nLearn more about configuration settings at %s.\",\n            exc,\n            link,\n            extra={\"tb_only\": True},\n        )\n        return 251\n\n    from dvc_data.hashfile.cache import DiskError\n\n    if isinstance(exc, DiskError):\n        from dvc.utils import relpath\n\n        directory = relpath(exc.directory)\n        logger.exception(\n            (\n                \"Could not open pickled '%s' cache.\\n\"\n                \"Remove the '%s' directory and then retry this command.\"\n                \"\\nSee %s for more information.\"\n            ),\n            exc.type,\n            directory,\n            error_link(\"pickle\"),\n            extra={\"tb_only\": True},\n        )\n        return None\n\n    from dvc_data.hashfile.build import IgnoreInCollectedDirError\n\n    if isinstance(exc, IgnoreInCollectedDirError):\n        logger.exception(\"\")\n        return None\n\n    _log_unknown_exceptions()\n    return None\n\n\ndef main(argv=None):  # noqa: C901, PLR0912, PLR0915\n    \"\"\"Main entry point for dvc CLI.\n\n    Args:\n        argv: optional list of arguments to parse. sys.argv is used by default.\n\n    Returns:\n        int: command's return code.\n    \"\"\"\n    from contextlib import ExitStack\n\n    from dvc._debug import debugtools\n    from dvc.config import ConfigError\n    from dvc.exceptions import DvcException, NotDvcRepoError\n    from dvc.logger import set_loggers_level\n\n    # NOTE: stderr/stdout may be closed if we are running from dvc.daemon.\n    # On Linux we directly call cli.main after double forking and closing\n    # the copied parent's standard file descriptors. If we make any logging\n    # calls in this state it will cause an exception due to writing to a closed\n    # file descriptor.\n    if not sys.stderr or sys.stderr.closed:\n        logging.disable()\n    elif not sys.stdout or sys.stdout.closed:\n        logging.disable(logging.INFO)\n\n    args = None\n    stack = ExitStack()\n    try:\n        args = parse_args(argv)\n\n        level = None\n        if args.quiet:\n            level = logging.CRITICAL\n        elif args.verbose == 1:\n            level = logging.DEBUG\n        elif args.verbose > 1:\n            level = logging.TRACE  # type: ignore[attr-defined]\n\n        if level is not None:\n            stack.enter_context(set_loggers_level(level))\n\n        if level and level <= logging.DEBUG:\n            from platform import platform, python_implementation, python_version\n\n            from dvc import PKG, __version__\n\n            pyv = f\"{python_implementation()} {python_version()}\"\n            pkg = f\" ({PKG})\" if PKG else \"\"\n            logger.debug(\"v%s%s, %s on %s\", __version__, pkg, pyv, platform())\n            logger.debug(\"command: %s\", \" \".join(argv or sys.argv))\n\n        logger.trace(args)\n\n        if sys.stdout and not sys.stdout.closed and not args.quiet:\n            from dvc.ui import ui\n\n            ui.enable()\n\n        with debugtools(args):\n            cmd = args.func(args)\n            ret = cmd.do_run()\n    except ConfigError:\n        logger.exception(\"configuration error\")\n        ret = 251\n    except KeyboardInterrupt:\n        logger.exception(\"interrupted by the user\")\n        ret = 252\n    except BrokenPipeError:\n        import os\n\n        # Python flushes standard streams on exit; redirect remaining output\n        # to devnull to avoid another BrokenPipeError at shutdown\n        # See: https://docs.python.org/3/library/signal.html#note-on-sigpipe\n        devnull = os.open(os.devnull, os.O_WRONLY)\n        os.dup2(devnull, sys.stdout.fileno())\n        ret = 141  # 128 + 13 (SIGPIPE)\n    except NotDvcRepoError:\n        logger.exception(\"\")\n        ret = 253\n    except DvcException:\n        ret = 255\n        logger.exception(\"\")\n    except DvcParserError:\n        ret = 254\n    except Exception as exc:  # noqa: BLE001\n        ret = _log_exceptions(exc) or 255\n\n    try:\n        import os\n\n        from dvc import analytics\n\n        if analytics.is_enabled():\n            analytics.collect_and_send_report(args, ret)\n\n        logger.trace(\"Process %s exiting with %s\", os.getpid(), ret)\n\n        return ret\n    finally:\n        stack.close()\n\n        from dvc.repo.open_repo import clean_repos\n\n        # Remove cached repos in the end of the call, these are anonymous\n        # so won't be reused by any other subsequent run anyway.\n        clean_repos()\n"
  },
  {
    "path": "dvc/cli/command.py",
    "content": "import os\nfrom abc import ABC, abstractmethod\nfrom typing import TYPE_CHECKING, Any\n\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from dvc.config import Config\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdBase(ABC):\n    UNINITIALIZED = False\n\n    def __init__(self, args: Any):\n        from dvc.repo import Repo\n\n        os.chdir(args.cd)\n\n        self.repo: Repo = Repo(\n            uninitialized=self.UNINITIALIZED,\n            _wait_for_lock=args.wait_for_lock,\n        )\n        self.config: Config = self.repo.config\n        self.args = args\n\n    def do_run(self):\n        with self.repo:\n            return self.run()\n\n    @abstractmethod\n    def run(self):\n        pass\n\n\nclass CmdBaseNoRepo(CmdBase):\n    def __init__(self, args):\n        self.args = args\n\n        os.chdir(args.cd)\n\n    def do_run(self):\n        return self.run()\n"
  },
  {
    "path": "dvc/cli/completion.py",
    "content": "import shtab\n\nBASH_PREAMBLE = \"\"\"\n# $1=COMP_WORDS[1]\n_dvc_compgen_DVCFiles() {\n  compgen -d -S '/' -- $1  # recurse into subdirs\n  compgen -f -X '!*?.dvc' -- $1\n  compgen -f -X '!*Dvcfile' -- $1\n  compgen -f -X '!*dvc.yaml' -- $1\n}\n\n_dvc_compgen_stages() {\n    local _dvc_stages=($(dvc stage list -q --names-only))\n    compgen -W \"${_dvc_stages[*]}\" -- $1\n}\n_dvc_compgen_stages_and_files() {\n    _dvc_compgen_DVCFiles $1\n    _dvc_compgen_stages $1\n}\n\n_dvc_compgen_exps() {\n    local _dvc_exps=($(dvc exp list -q --all-commits --names-only))\n    compgen -W \"${_dvc_exps[*]}\" -- $1\n}\n\n_dvc_compgen_remotes() {\n    local _dvc_remotes=($(dvc remote list | cut -d' ' -f1))\n    compgen -W \"${_dvc_remotes[*]}\" -- $1\n}\n\n_dvc_compgen_config_vars() {\n    compgen -W \"${_dvc_config_vars[*]}\" -- $1\n}\n\"\"\"\n\nZSH_PREAMBLE = \"\"\"\n_dvc_compadd_DVCFiles() {\n    _files -g '(*?.dvc|Dvcfile|dvc.yaml)'\n}\n_dvc_compadd_stages() {\n    # this will also show up the description of the stages\n    _describe 'stages' \"($(_dvc_stages_output))\"\n}\n\n_dvc_stages_output() {\n  dvc stage list -q | awk '{\n    # escape possible `:` on the stage name\n    sub(/:/, \"\\\\\\\\\\\\\\\\:\", $1);\n    # read all of the columns except the first\n    # reading `out` from $2, so as not to have a leading whitespace\n    out=$2; for(i=3;i<=NF;i++){out=out\" \"$i};\n    # print key, \":\" and then single-quote the description\n    # colon is a delimiter used by `_describe` to separate field/description\n    print $1\":\"\"\\\\047\"out\"\\\\047\"\n    # single quote -> \\\\047\n    }'\n}\n\n_dvc_compadd_stages_and_files() {\n    _dvc_compadd_DVCFiles\n    _dvc_compadd_stages\n}\n\n_dvc_compadd_exps() {\n    _describe 'experiments' \"($(dvc exp list -q --all-commits --names-only))\"\n}\n\n_dvc_compadd_remotes() {\n    _describe 'remotes' \"($(dvc remote list | cut -d' ' -f1))\"\n}\n\n_dvc_compadd_config_vars() {\n    _describe 'config_vars' _dvc_config_vars\n}\n\"\"\"\n\nPREAMBLE = {\n    \"bash\": BASH_PREAMBLE,\n    \"zsh\": ZSH_PREAMBLE,\n}\n\nFILE = shtab.FILE\nDIR = shtab.DIRECTORY\nDVC_FILE = {\"bash\": \"_dvc_compgen_DVCFiles\", \"zsh\": \"_dvc_compadd_DVCFiles\"}\nSTAGE = {\"bash\": \"_dvc_compgen_stages\", \"zsh\": \"_dvc_compadd_stages\"}\nDVCFILES_AND_STAGE = {\n    \"bash\": \"_dvc_compgen_stages_and_files\",\n    \"zsh\": \"_dvc_compadd_stages_and_files\",\n}\nEXPERIMENT = {\"bash\": \"_dvc_compgen_exps\", \"zsh\": \"_dvc_compadd_exps\"}\nREMOTE = {\"bash\": \"_dvc_compgen_remotes\", \"zsh\": \"_dvc_compadd_remotes\"}\nCONFIG_VARS = {\"bash\": \"_dvc_compgen_config_vars\", \"zsh\": \"_dvc_compadd_config_vars\"}\n\n\ndef get_preamble() -> dict[str, str]:\n    from dvc.config_schema import config_vars_for_completion\n\n    ret: dict[str, str] = {}\n    config_vars = list(config_vars_for_completion())\n\n    nl = \"\\n\\t\".expandtabs(4)\n    config_vars_arr = f\"\"\"\n_dvc_config_vars=(\n    {nl.join(config_vars)}\n)\n\"\"\"\n    for shell, preamble in PREAMBLE.items():\n        ret[shell] = config_vars_arr + preamble\n    return ret\n"
  },
  {
    "path": "dvc/cli/formatter.py",
    "content": "import argparse\n\n\nclass HelpFormatter(argparse.HelpFormatter):\n    def _get_default_metavar_for_optional(self, action: argparse.Action) -> str:\n        return action.dest\n\n\nclass RawTextHelpFormatter(HelpFormatter, argparse.RawTextHelpFormatter):\n    pass\n\n\nclass RawDescriptionHelpFormatter(HelpFormatter, argparse.RawDescriptionHelpFormatter):\n    pass\n"
  },
  {
    "path": "dvc/cli/parser.py",
    "content": "\"\"\"Main parser for the dvc cli.\"\"\"\n\nimport argparse\nimport os\nfrom functools import lru_cache\n\nfrom dvc import __version__\nfrom dvc.commands import (\n    add,\n    artifacts,\n    cache,\n    check_ignore,\n    checkout,\n    commit,\n    completion,\n    config,\n    daemon,\n    dag,\n    data,\n    data_sync,\n    dataset,\n    destroy,\n    diff,\n    du,\n    experiments,\n    freeze,\n    gc,\n    get,\n    get_url,\n    git_hook,\n    imp,\n    imp_db,\n    imp_url,\n    init,\n    install,\n    ls,\n    ls_url,\n    metrics,\n    move,\n    params,\n    plots,\n    queue,\n    remote,\n    remove,\n    repro,\n    root,\n    stage,\n    studio,\n    unprotect,\n    update,\n    version,\n)\nfrom dvc.log import logger\n\nfrom . import DvcParserError, formatter\n\nlogger = logger.getChild(__name__)\n\nCOMMANDS = [\n    add,\n    artifacts,\n    cache,\n    check_ignore,\n    checkout,\n    commit,\n    completion,\n    config,\n    daemon,\n    dag,\n    data,\n    data_sync,\n    dataset,\n    destroy,\n    diff,\n    du,\n    experiments,\n    freeze,\n    gc,\n    get,\n    get_url,\n    git_hook,\n    imp,\n    imp_db,\n    imp_url,\n    init,\n    install,\n    ls,\n    ls_url,\n    metrics,\n    move,\n    params,\n    plots,\n    queue,\n    remote,\n    remove,\n    repro,\n    root,\n    stage,\n    studio,\n    unprotect,\n    update,\n    version,\n]\n\n\ndef _find_parser(parser, cmd_cls):\n    defaults = parser._defaults\n    if not cmd_cls or cmd_cls == defaults.get(\"func\"):\n        parser.print_help()\n        raise DvcParserError\n\n    actions = parser._actions\n    for action in actions:\n        if not isinstance(action.choices, dict):\n            # NOTE: we are only interested in subparsers\n            continue\n        for subparser in action.choices.values():\n            _find_parser(subparser, cmd_cls)\n\n\nclass DvcParser(argparse.ArgumentParser):\n    \"\"\"Custom parser class for dvc CLI.\"\"\"\n\n    def error(self, message, cmd_cls=None):\n        logger.error(message)\n        _find_parser(self, cmd_cls)\n\n    def parse_args(self, args=None, namespace=None):\n        # NOTE: overriding to provide a more granular help message.\n        # E.g. `dvc plots diff --bad-flag` would result in a `dvc plots diff`\n        # help message instead of generic `dvc` usage.\n        args, argv = self.parse_known_args(args, namespace)\n        if argv:\n            msg = \"unrecognized arguments: %s\"\n            self.error(msg % \" \".join(argv), getattr(args, \"func\", None))\n        return args\n\n\ndef get_parent_parser():\n    \"\"\"Create instances of a parser containing common arguments shared among\n    all the commands.\n\n    When overwriting `-q` or `-v`, you need to instantiate a new object\n    in order to prevent some weird behavior.\n    \"\"\"\n    from dvc._debug import add_debugging_flags\n\n    parent_parser = argparse.ArgumentParser(add_help=False)\n    log_level_group = parent_parser.add_mutually_exclusive_group()\n    log_level_group.add_argument(\n        \"-q\", \"--quiet\", action=\"count\", default=0, help=\"Be quiet.\"\n    )\n    log_level_group.add_argument(\n        \"-v\", \"--verbose\", action=\"count\", default=0, help=\"Be verbose.\"\n    )\n    add_debugging_flags(parent_parser)\n\n    return parent_parser\n\n\n@lru_cache(maxsize=1)\ndef get_main_parser():\n    parent_parser = get_parent_parser()\n\n    # Main parser\n    desc = \"Data Version Control\"\n    parser = DvcParser(\n        prog=\"dvc\",\n        description=desc,\n        parents=[parent_parser],\n        formatter_class=formatter.RawTextHelpFormatter,\n        add_help=False,\n    )\n\n    # NOTE: We are doing this to capitalize help message.\n    # Unfortunately, there is no easier and clearer way to do it,\n    # as adding this argument in get_parent_parser() either in\n    # log_level_group or on parent_parser itself will cause unexpected error.\n    parser.add_argument(\n        \"-h\",\n        \"--help\",\n        action=\"help\",\n        default=argparse.SUPPRESS,\n        help=\"Show this help message and exit.\",\n    )\n\n    parser.add_argument(\n        \"-V\",\n        \"--version\",\n        action=\"version\",\n        version=__version__,\n        help=\"Show program's version.\",\n    )\n\n    parser.add_argument(\n        \"--cd\",\n        default=os.path.curdir,\n        metavar=\"<path>\",\n        help=\"Change to directory before executing.\",\n        type=str,\n    )\n\n    parser.add_argument(\n        \"--wait-for-lock\",\n        action=\"store_true\",\n        default=False,\n        help=\"Wait for the lock if it is already held by another process, instead of\"\n        \" failing immediately.\",\n    )\n\n    # Sub commands\n    subparsers = parser.add_subparsers(\n        title=\"Available Commands\",\n        metavar=\"command\",\n        dest=\"cmd\",\n        help=\"Use `dvc command --help` for command-specific help.\",\n        required=True,\n    )\n\n    for cmd in COMMANDS:\n        cmd.add_parser(subparsers, parent_parser)\n\n    return parser\n"
  },
  {
    "path": "dvc/cli/utils.py",
    "content": "import argparse\n\n\nclass DictAction(argparse.Action):\n    def __init__(self, *args, **kwargs):\n        kwargs.setdefault(\"metavar\", \"<name>=<value>\")\n        super().__init__(*args, **kwargs)\n\n    def __call__(self, parser, args, values, option_string=None):  # noqa: ARG002\n        d = getattr(args, self.dest) or {}\n\n        if isinstance(values, list):\n            kvs = values\n        else:\n            kvs = [values]\n\n        for kv in kvs:\n            key, value = kv.split(\"=\", 1)\n            if not value:\n                raise argparse.ArgumentError(\n                    self,\n                    f'Could not parse argument \"{values}\" as k1=v1 k2=v2 ... format',\n                )\n            d[key] = value\n\n        setattr(args, self.dest, d)\n\n\ndef append_doc_link(help_message, path):\n    from dvc.utils import format_link\n\n    if not path:\n        return help_message\n    doc_base = \"https://man.dvc.org/\"\n    return f\"{help_message}\\nDocumentation: {format_link(doc_base + path)}\"\n\n\ndef hide_subparsers_from_help(subparsers):\n    # metavar needs to be explicitly set in order to hide subcommands\n    # from the 'positional arguments' choices list\n    # see: https://bugs.python.org/issue22848\n    # Need to set `add_help=False`, but avoid setting `help`\n    # (not even to `argparse.SUPPRESS`).\n    # NOTE: The argument is the parent subparser, not the subcommand parser.\n    cmds = [cmd for cmd, parser in subparsers.choices.items() if parser.add_help]\n    subparsers.metavar = \"{{{}}}\".format(\",\".join(cmds))\n"
  },
  {
    "path": "dvc/commands/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/commands/add.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdAdd(CmdBase):\n    def validate_args(self) -> None:\n        from dvc.exceptions import InvalidArgumentError\n\n        args = self.args\n        invalid_opt = None\n\n        if args.to_remote or args.out:\n            message = \"{option} can't be used with \"\n            message += \"--to-remote\" if args.to_remote else \"--out\"\n            if len(args.targets) != 1:\n                invalid_opt = \"multiple targets\"\n            elif args.glob:\n                invalid_opt = \"--glob option\"\n            elif args.no_commit:\n                invalid_opt = \"--no-commit option\"\n        else:\n            message = \"{option} can't be used without --to-remote\"\n            if args.remote:\n                invalid_opt = \"--remote\"\n            elif args.remote_jobs:\n                invalid_opt = \"--remote-jobs\"\n\n        if invalid_opt is not None:\n            raise InvalidArgumentError(message.format(option=invalid_opt))\n\n    def run(self):\n        from dvc.exceptions import DvcException, InvalidArgumentError\n\n        try:\n            self.validate_args()\n        except InvalidArgumentError:\n            logger.exception(\"\")\n            return 1\n\n        try:\n            self.repo.add(\n                self.args.targets,\n                no_commit=self.args.no_commit,\n                glob=self.args.glob,\n                out=self.args.out,\n                remote=self.args.remote,\n                to_remote=self.args.to_remote,\n                remote_jobs=self.args.remote_jobs,\n                force=self.args.force,\n                relink=self.args.relink,\n            )\n        except FileNotFoundError:\n            logger.exception(\"\")\n            return 1\n        except DvcException:\n            logger.exception(\"\")\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    ADD_HELP = \"Track data files or directories with DVC.\"\n\n    parser = subparsers.add_parser(\n        \"add\",\n        parents=[parent_parser],\n        description=append_doc_link(ADD_HELP, \"add\"),\n        help=ADD_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    parser.add_argument(\n        \"--no-commit\",\n        action=\"store_true\",\n        default=False,\n        help=\"Don't put files/directories into cache.\",\n    )\n    parser.add_argument(\n        \"--glob\",\n        action=\"store_true\",\n        default=False,\n        help=\"Allows targets containing shell-style wildcards.\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--out\",\n        help=\"Destination path to put files to.\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"--to-remote\",\n        action=\"store_true\",\n        default=False,\n        help=\"Download it directly to the remote\",\n    )\n    parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to download to\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    parser.add_argument(\n        \"--remote-jobs\",\n        type=int,\n        help=(\n            \"Only used along with '--to-remote'. \"\n            \"Number of jobs to run simultaneously \"\n            \"when pushing data to remote.\"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override local file or folder if exists.\",\n    )\n    parser.add_argument(\n        \"--no-relink\",\n        dest=\"relink\",\n        action=\"store_false\",\n        help=\"Don't recreate links from cache to workspace.\",\n    )\n    parser.set_defaults(relink=True)\n    parser.add_argument(\n        \"targets\", nargs=\"+\", help=\"Input files/directories to add.\"\n    ).complete = completion.FILE\n    parser.set_defaults(func=CmdAdd)\n"
  },
  {
    "path": "dvc/commands/artifacts.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdArtifactsGet(CmdBaseNoRepo):\n    def run(self):\n        from dvc.repo.artifacts import Artifacts\n        from dvc.scm import CloneError\n        from dvc.ui import ui\n\n        if self.args.show_url:\n            return self._show_url()\n\n        try:\n            count, out = Artifacts.get(\n                self.args.url,\n                name=self.args.name,\n                version=self.args.rev,\n                stage=self.args.stage,\n                force=self.args.force,\n                config=self.args.config,\n                remote=self.args.remote,\n                remote_config=self.args.remote_config,\n                out=self.args.out,\n            )\n            ui.write(f\"Downloaded {count} file(s) to '{out}'\")\n            return 0\n        except CloneError:\n            logger.exception(\"failed to get '%s'\", self.args.name)\n            return 1\n        except DvcException:\n            logger.exception(\n                \"failed to get '%s' from '%s'\", self.args.name, self.args.url\n            )\n            return 1\n\n    def _show_url(self):\n        from dvc.api import artifacts_show, get_url\n        from dvc.ui import ui\n\n        artifact = artifacts_show(\n            self.args.name,\n            version=self.args.rev,\n            stage=self.args.stage,\n            repo=self.args.url,\n        )\n\n        url = get_url(\n            artifact[\"path\"],\n            repo=self.args.url,\n            rev=artifact[\"rev\"],\n            remote=self.args.remote,\n            remote_config=self.args.remote_config,\n        )\n        ui.write(url, force=True)\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    ARTIFACTS_HELP = \"DVC model registry artifact commands.\"\n\n    artifacts_parser = subparsers.add_parser(\n        \"artifacts\",\n        parents=[parent_parser],\n        description=append_doc_link(ARTIFACTS_HELP, \"artifacts\"),\n        help=ARTIFACTS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    artifacts_subparsers = artifacts_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc artifacts CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    ARTIFACTS_GET_HELP = \"Download an artifact from a DVC project.\"\n    get_parser = artifacts_subparsers.add_parser(\n        \"get\",\n        parents=[parent_parser],\n        description=append_doc_link(ARTIFACTS_GET_HELP, \"artifacts/get\"),\n        help=ARTIFACTS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    get_parser.add_argument(\"url\", help=\"Location of DVC repository to download from\")\n    get_parser.add_argument(\n        \"name\", help=\"Name of artifact in the repository\"\n    ).complete = completion.FILE\n    get_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Artifact version\",\n        metavar=\"<version>\",\n    )\n    get_parser.add_argument(\n        \"--stage\",\n        nargs=\"?\",\n        help=\"Artifact stage\",\n        metavar=\"<stage>\",\n    )\n    get_parser.add_argument(\n        \"-o\",\n        \"--out\",\n        nargs=\"?\",\n        help=\"Destination path to download artifact to\",\n        metavar=\"<path>\",\n    ).complete = completion.DIR\n    get_parser.add_argument(\n        \"--show-url\",\n        action=\"store_true\",\n        help=(\n            \"Print the storage location (URL) the target data would be \"\n            \"downloaded from, and exit.\"\n        ),\n    )\n    get_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    get_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override local file or folder if exists.\",\n    )\n    get_parser.add_argument(\n        \"--config\",\n        type=str,\n        help=(\n            \"Path to a config file that will be merged with the config \"\n            \"in the target repository.\"\n        ),\n    )\n    get_parser.add_argument(\n        \"--remote\",\n        type=str,\n        help=(\n            \"Remote name to set as a default in the target repository \"\n            \"(only applicable when downloading from DVC remote).\"\n        ),\n    ).complete = completion.REMOTE\n    get_parser.add_argument(\n        \"--remote-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=(\n            \"Remote config options to merge with a remote's config (default or one \"\n            \"specified by '--remote') in the target repository (only applicable \"\n            \"when downloading from DVC remote).\"\n        ),\n    )\n    get_parser.set_defaults(func=CmdArtifactsGet)\n"
  },
  {
    "path": "dvc/commands/cache.py",
    "content": "import argparse\nimport os\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.config import CmdConfig\nfrom dvc.ui import ui\n\n\nclass CmdCacheDir(CmdConfig):\n    def run(self):\n        if self.args.value is None and not self.args.unset:\n            from dvc.config import ConfigError\n\n            if self.args.level:\n                conf = self.config.read(level=self.args.level)\n            else:\n                # Use merged config with default values\n                conf = self.config\n            try:\n                self._check(conf, False, \"cache\", \"dir\")\n                path = conf[\"cache\"][\"dir\"]\n            except ConfigError:\n                if not self.config.dvc_dir or self.args.level:\n                    raise\n                path = os.path.join(self.config.dvc_dir, \"cache\")\n            ui.write(path)\n            return 0\n        with self.config.edit(level=self.args.level) as conf:\n            if self.args.unset:\n                self._check(conf, False, \"cache\", \"dir\")\n                del conf[\"cache\"][\"dir\"]\n            else:\n                self._check(conf, False, \"cache\")\n                conf[\"cache\"][\"dir\"] = self.args.value\n        return 0\n\n\nclass CmdCacheMigrate(CmdBase):\n    def run(self):\n        from dvc.cachemgr import migrate_2_to_3\n        from dvc.repo.commit import commit_2_to_3\n\n        migrate_2_to_3(self.repo, dry=self.args.dry)\n        if self.args.dvc_files:\n            commit_2_to_3(self.repo, dry=self.args.dry)\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    from dvc.commands.config import parent_config_parser\n\n    CACHE_HELP = \"Manage cache settings.\"\n\n    cache_parser = subparsers.add_parser(\n        \"cache\",\n        parents=[parent_parser],\n        description=append_doc_link(CACHE_HELP, \"cache\"),\n        help=CACHE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    cache_subparsers = cache_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc cache CMD --help` for command-specific help.\",\n        required=True,\n    )\n\n    parent_cache_config_parser = argparse.ArgumentParser(\n        add_help=False, parents=[parent_config_parser]\n    )\n    CACHE_DIR_HELP = \"Configure cache directory location.\"\n\n    cache_dir_parser = cache_subparsers.add_parser(\n        \"dir\",\n        parents=[parent_parser, parent_cache_config_parser],\n        description=append_doc_link(CACHE_HELP, \"cache/dir\"),\n        help=CACHE_DIR_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    cache_dir_parser.add_argument(\n        \"-u\",\n        \"--unset\",\n        default=False,\n        action=\"store_true\",\n        help=\"Unset option.\",\n    )\n    cache_dir_parser.add_argument(\n        \"value\",\n        help=(\n            \"Path to cache directory. Relative paths are resolved relative \"\n            \"to the current directory and saved to config relative to the \"\n            \"config file location. If no path is provided, it returns the \"\n            \"current cache directory.\"\n        ),\n        nargs=\"?\",\n    ).complete = completion.DIR\n    cache_dir_parser.set_defaults(func=CmdCacheDir)\n\n    CACHE_MIGRATE_HELP = \"Migrate cached files to the DVC 3.0 cache location.\"\n    cache_migrate_parser = cache_subparsers.add_parser(\n        \"migrate\",\n        parents=[parent_parser],\n        description=append_doc_link(CACHE_HELP, \"cache/migrate\"),\n        help=CACHE_MIGRATE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    cache_migrate_parser.add_argument(\n        \"--dvc-files\",\n        help=(\n            \"Migrate entries in all existing DVC files in the repository \"\n            \"to the DVC 3.0 format.\"\n        ),\n        action=\"store_true\",\n    )\n    cache_migrate_parser.add_argument(\n        \"--dry\",\n        help=(\n            \"Only print actions which would be taken without actually migrating \"\n            \"any data.\"\n        ),\n        action=\"store_true\",\n    )\n    cache_migrate_parser.set_defaults(func=CmdCacheMigrate)\n"
  },
  {
    "path": "dvc/commands/check_ignore.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.ui import ui\n\nif TYPE_CHECKING:\n    from dvc.ignore import CheckIgnoreResult\n\n\nclass CmdCheckIgnore(CmdBase):\n    def __init__(self, args):\n        super().__init__(args)\n        self.ignore_filter = self.repo.dvcignore\n\n    def _show_results(self, result: \"CheckIgnoreResult\"):\n        if not result.match and not self.args.details:\n            return None\n\n        if self.args.details:\n            pattern_infos = result.pattern_infos\n            patterns = [str(pi) for pi in pattern_infos]\n            if not patterns and self.args.non_matching:\n                patterns = [\"::\"]\n            if not self.args.all:\n                patterns = patterns[-1:]\n\n            for pattern in patterns:\n                ui.write(pattern, result.file, sep=\"\\t\")\n        else:\n            ui.write(result.file)\n        return bool(result.pattern_infos)\n\n    def _check_one_file(self, target):\n        result = self.ignore_filter.check_ignore(target)\n        return bool(self._show_results(result))\n\n    def _interactive_mode(self):\n        ret = 1\n        while True:\n            try:\n                target = input()\n            except (KeyboardInterrupt, EOFError):\n                break\n            if not target:\n                break\n            if self._check_one_file(target):\n                ret = 0\n        return ret\n\n    def _normal_mode(self):\n        ret = 1\n        for target in self.args.targets:\n            if self._check_one_file(target):\n                ret = 0\n        return ret\n\n    def _check_args(self):\n        from dvc.exceptions import DvcException\n\n        if not self.args.stdin and not self.args.targets:\n            raise DvcException(\"`targets` or `--stdin` needed\")\n\n        if self.args.stdin and self.args.targets:\n            raise DvcException(\"cannot have both `targets` and `--stdin`\")\n\n        if self.args.non_matching and not self.args.details:\n            raise DvcException(\"`--non-matching` is only valid with `--details`\")\n\n        if self.args.all and not self.args.details:\n            raise DvcException(\"`--all` is only valid with `--details`\")\n\n        if self.args.quiet and self.args.details:\n            raise DvcException(\"cannot use both `--details` and `--quiet`\")\n\n    def run(self):\n        self._check_args()\n        if self.args.stdin:\n            return self._interactive_mode()\n        return self._normal_mode()\n\n\ndef add_parser(subparsers, parent_parser):\n    import argparse\n\n    ADD_HELP = \"Check whether files or directories are excluded due to `.dvcignore`.\"\n\n    parser = subparsers.add_parser(\n        \"check-ignore\",\n        parents=[parent_parser],\n        description=append_doc_link(ADD_HELP, \"check-ignore\"),\n        help=ADD_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--details\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show the exclude patterns along with each target path.\",\n    )\n    parser.add_argument(\n        \"-a\", \"--all\", action=\"store_true\", default=False, help=argparse.SUPPRESS\n    )\n    parser.add_argument(\n        \"-n\",\n        \"--non-matching\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Include the target paths which don't match any pattern \"\n            \"in the `--details` list.\"\n        ),\n    )\n    parser.add_argument(\n        \"--stdin\",\n        action=\"store_true\",\n        default=False,\n        help=\"Read paths from standard input instead of providing `targets`.\",\n    )\n    parser.add_argument(\n        \"targets\", nargs=\"*\", help=\"File or directory paths to check\"\n    ).complete = completion.FILE\n    parser.set_defaults(func=CmdCheckIgnore)\n"
  },
  {
    "path": "dvc/commands/checkout.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import CheckoutError\nfrom dvc.ui import ui\n\n\ndef log_changes(stats):\n    colors = {\n        \"modified\": \"yellow\",\n        \"added\": \"green\",\n        \"deleted\": \"red\",\n    }\n\n    for state, color in colors.items():\n        entries = stats.get(state)\n\n        if not entries:\n            continue\n\n        for entry in entries:\n            ui.write(f\"[{color}]{state[0].upper()}\", entry, styled=True, sep=\"\\t\")\n\n\nclass CmdCheckout(CmdBase):\n    def run(self):\n        from dvc.utils.humanize import get_summary\n\n        stats, exc = None, None\n        try:\n            result = self.repo.checkout(\n                targets=self.args.targets,\n                with_deps=self.args.with_deps,\n                force=self.args.force,\n                relink=self.args.relink,\n                recursive=self.args.recursive,\n                allow_missing=self.args.allow_missing,\n            )\n        except CheckoutError as _exc:\n            exc = _exc\n            result = exc.result\n\n        if self.args.summary:\n            default_message = \"No changes.\"\n            stats = result[\"stats\"]\n            assert isinstance(stats, dict)\n            msg = get_summary(stats.items())\n            ui.write(msg or default_message)\n        else:\n            result.pop(\"stats\", {})\n            log_changes(result)\n\n        if exc:\n            raise exc\n\n        if self.args.relink:\n            msg = \"Relinked successfully\"\n            ui.write(msg)\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    CHECKOUT_HELP = \"Checkout data files from cache.\"\n\n    checkout_parser = subparsers.add_parser(\n        \"checkout\",\n        parents=[parent_parser],\n        description=append_doc_link(CHECKOUT_HELP, \"checkout\"),\n        help=CHECKOUT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    checkout_parser.add_argument(\n        \"--summary\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show summary of the changes.\",\n    )\n    checkout_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Checkout all dependencies of the specified target.\",\n    )\n    checkout_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Checkout all subdirectories of the specified directory.\",\n    )\n    checkout_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Do not prompt when removing working directory files.\",\n    )\n    checkout_parser.add_argument(\n        \"--relink\",\n        action=\"store_true\",\n        default=False,\n        help=\"Recreate links or copies from cache to workspace.\",\n    )\n    checkout_parser.add_argument(\n        \"--allow-missing\",\n        action=\"store_true\",\n        default=False,\n        help=\"Ignore errors if some of the files or directories are missing.\",\n    )\n    checkout_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Limit command scope to these tracked files/directories, \"\n            \".dvc files and stage names.\"\n        ),\n    ).complete = completion.DVC_FILE\n    checkout_parser.set_defaults(func=CmdCheckout)\n"
  },
  {
    "path": "dvc/commands/commit.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdCommit(CmdBase):\n    def run(self):\n        from dvc.exceptions import DvcException\n\n        if not self.args.targets:\n            self.args.targets = [None]\n\n        for target in self.args.targets:\n            try:\n                self.repo.commit(\n                    target,\n                    with_deps=self.args.with_deps,\n                    recursive=self.args.recursive,\n                    force=self.args.force,\n                    relink=self.args.relink,\n                )\n            except DvcException:\n                logger.exception(\"failed to commit%s\", (\" \" + target) if target else \"\")\n                return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    COMMIT_HELP = (\n        \"Record changes to files or directories tracked by DVC\"\n        \" by storing the current versions in the cache.\"\n    )\n\n    commit_parser = subparsers.add_parser(\n        \"commit\",\n        parents=[parent_parser],\n        description=append_doc_link(COMMIT_HELP, \"commit\"),\n        help=COMMIT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    commit_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Commit data even if hash values for dependencies or \"\n            \"outputs did not change.\"\n        ),\n    )\n    commit_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Commit all dependencies of the specified target.\",\n    )\n    commit_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Commit cache for subdirectories of the specified directory.\",\n    )\n    commit_parser.add_argument(\n        \"--no-relink\",\n        dest=\"relink\",\n        action=\"store_false\",\n        help=\"Don't recreate links from cache to workspace.\",\n    )\n    commit_parser.set_defaults(relink=True)\n    commit_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Limit command scope to these tracked files/directories, \"\n            \".dvc files and stage names.\"\n        ),\n    ).complete = completion.DVCFILES_AND_STAGE\n    commit_parser.set_defaults(func=CmdCommit)\n"
  },
  {
    "path": "dvc/commands/completion.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.completion import get_preamble\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nSUPPORTED_SHELLS = [\"bash\", \"zsh\"]\n\n\nclass CmdCompletion(CmdBaseNoRepo):\n    def run(self):\n        import shtab\n\n        shell = self.args.shell\n        parser = self.args.parser\n        script = shtab.complete(parser, shell=shell, preamble=get_preamble())\n        ui.write(script, force=True)\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    COMPLETION_HELP = \"Generate shell tab completion.\"\n    COMPLETION_DESCRIPTION = \"Prints out shell tab completion scripts.\"\n    completion_parser = subparsers.add_parser(\n        \"completion\",\n        parents=[parent_parser],\n        description=append_doc_link(COMPLETION_DESCRIPTION, \"completion\"),\n        help=COMPLETION_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    completion_parser.add_argument(\n        \"-s\",\n        \"--shell\",\n        help=\"Shell syntax for completions.\",\n        default=\"bash\",\n        choices=SUPPORTED_SHELLS,\n    )\n    completion_parser.set_defaults(func=CmdCompletion)\n"
  },
  {
    "path": "dvc/commands/config.py",
    "content": "import argparse\nimport os\n\nfrom funcy import set_in\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\nNAME_REGEX = r\"^(?P<top>(remote|db)\\.)?(?P<section>[^\\.]*)\\.(?P<option>[^\\.]*)$\"\n\n\ndef _name_type(value):\n    import re\n\n    match = re.match(NAME_REGEX, value)\n    if not match:\n        raise argparse.ArgumentTypeError(\n            \"name argument should look like remote.name.option or \"\n            \"db.name.option or section.option\"\n        )\n    top = match.group(\"top\")\n    return (\n        top.strip(\".\") if top else None,\n        match.group(\"section\").lower(),\n        match.group(\"option\").lower(),\n    )\n\n\nclass CmdConfig(CmdBaseNoRepo):\n    def __init__(self, args):\n        from dvc.config import Config\n\n        super().__init__(args)\n\n        self.config = Config.from_cwd(validate=False)\n\n    def run(self):\n        if self.args.show_origin and (self.args.value or self.args.unset):\n            logger.error(\n                \"--show-origin can't be used together with any of these \"\n                \"options: -u/--unset, value\"\n            )\n            return 1\n\n        if self.args.list:\n            return self._list()\n\n        if self.args.name is None:\n            logger.error(\"name argument is required\")\n            return 1\n\n        remote_or_db, section, opt = self.args.name\n\n        if self.args.value is None and not self.args.unset:\n            return self._get(remote_or_db, section, opt)\n        return self._set(remote_or_db, section, opt)\n\n    def _list(self):\n        if any((self.args.name, self.args.value, self.args.unset)):\n            logger.error(\n                \"-l/--list can't be used together with any of these \"\n                \"options: -u/--unset, name, value\"\n            )\n            return 1\n\n        levels = self._get_appropriate_levels(self.args.level)\n\n        for level in levels:\n            conf = self.config.read(level)\n            prefix = self._config_file_prefix(self.args.show_origin, self.config, level)\n            configs = list(self._format_config(conf, prefix))\n            if configs:\n                ui.write(\"\\n\".join(configs))\n\n        return 0\n\n    def _get(self, remote_or_db, section, opt):\n        from dvc.config import ConfigError\n\n        levels = self._get_appropriate_levels(self.args.level)[::-1]\n\n        for level in levels:\n            conf = self.config.read(level)\n            if remote_or_db:\n                conf = conf[remote_or_db]\n\n            try:\n                self._check(conf, remote_or_db, section, opt)\n            except ConfigError:\n                if self.args.level:\n                    raise\n            else:\n                prefix = self._config_file_prefix(\n                    self.args.show_origin, self.config, level\n                )\n                ui.write(prefix, conf[section][opt], sep=\"\")\n                break\n\n        return 0\n\n    def _set(self, remote_or_db, section, opt):\n        with self.config.edit(self.args.level) as conf:\n            if remote_or_db:\n                conf = conf[remote_or_db]\n            if self.args.unset:\n                self._check(conf, remote_or_db, section, opt)\n                del conf[section][opt]\n            else:\n                conf.update(set_in(conf, [section, opt], self.args.value))\n\n        if self.args.name == \"cache.type\":\n            logger.warning(\n                \"You have changed the 'cache.type' option. This doesn't update\"\n                \" any existing workspace file links, but it can be done with:\"\n                \"\\n             dvc checkout --relink\"\n            )\n\n        return 0\n\n    def _check(self, conf, remote_or_db, section, opt=None):\n        from dvc.config import ConfigError\n\n        name = remote_or_db or \"section\"\n        if section not in conf:\n            raise ConfigError(f\"{name} '{section}' doesn't exist\")\n\n        if opt and opt not in conf[section]:\n            raise ConfigError(f\"option '{opt}' doesn't exist in {name} '{section}'\")\n\n    def _get_appropriate_levels(self, levels):\n        if levels:\n            self._validate_level_for_non_repo_operation(levels)\n            return [levels]\n        if self.config.dvc_dir is None:\n            return self.config.SYSTEM_LEVELS\n        return self.config.LEVELS\n\n    def _validate_level_for_non_repo_operation(self, level):\n        from dvc.config import ConfigError\n\n        if self.config.dvc_dir is None and level in self.config.REPO_LEVELS:\n            raise ConfigError(\"Not inside a DVC repo\")\n\n    @staticmethod\n    def _format_config(config, prefix=\"\"):\n        from dvc.utils.flatten import flatten\n\n        for key, value in flatten(config).items():\n            yield f\"{prefix}{key}={value}\"\n\n    @staticmethod\n    def _config_file_prefix(show_origin, config, level):\n        from dvc.repo import Repo\n\n        if not show_origin:\n            return \"\"\n\n        level = level or \"repo\"\n        fname = config.files[level]\n\n        if level in [\"local\", \"repo\"]:\n            fname = os.path.relpath(fname, start=Repo.find_root())\n\n        return fname + \"\\t\"\n\n\nparent_config_parser = argparse.ArgumentParser(add_help=False)\nlevel_group = parent_config_parser.add_mutually_exclusive_group()\nlevel_group.add_argument(\n    \"--global\",\n    dest=\"level\",\n    action=\"store_const\",\n    const=\"global\",\n    help=\"Use global config.\",\n)\nlevel_group.add_argument(\n    \"--system\",\n    dest=\"level\",\n    action=\"store_const\",\n    const=\"system\",\n    help=\"Use system config.\",\n)\nlevel_group.add_argument(\n    \"--project\",\n    dest=\"level\",\n    action=\"store_const\",\n    const=\"repo\",\n    help=\"Use project config (.dvc/config).\",\n)\nlevel_group.add_argument(\n    \"--local\",\n    dest=\"level\",\n    action=\"store_const\",\n    const=\"local\",\n    help=\"Use local config (.dvc/config.local).\",\n)\nparent_config_parser.set_defaults(level=None)\n\n\ndef add_parser(subparsers, parent_parser):\n    CONFIG_HELP = \"Get or set config options.\"\n\n    config_parser = subparsers.add_parser(\n        \"config\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(CONFIG_HELP, \"config\"),\n        help=CONFIG_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    config_parser.add_argument(\n        \"-u\",\n        \"--unset\",\n        default=False,\n        action=\"store_true\",\n        help=\"Unset option.\",\n    )\n    config_parser.add_argument(\n        \"name\",\n        nargs=\"?\",\n        type=_name_type,\n        help=\"Option name (section.option or remote.name.option).\",\n    ).complete = completion.CONFIG_VARS\n    config_parser.add_argument(\"value\", nargs=\"?\", help=\"Option value.\")\n    config_parser.add_argument(\n        \"-l\",\n        \"--list\",\n        default=False,\n        action=\"store_true\",\n        help=\"List all defined config values.\",\n    )\n    config_parser.add_argument(\n        \"--show-origin\",\n        default=False,\n        action=\"store_true\",\n        help=\"Show the source file containing each config value.\",\n    )\n    config_parser.set_defaults(func=CmdConfig)\n"
  },
  {
    "path": "dvc/commands/daemon.py",
    "content": "from dvc.cli import completion\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdDaemonBase(CmdBaseNoRepo):\n    pass\n\n\nclass CmdDaemonUpdater(CmdDaemonBase):\n    def run(self):\n        import os\n\n        from dvc.config import Config\n        from dvc.repo import Repo\n        from dvc.updater import Updater\n\n        root_dir = Repo.find_root()\n        dvc_dir = os.path.join(root_dir, Repo.DVC_DIR)\n        tmp_dir = os.path.join(dvc_dir, \"tmp\")\n        config = Config(dvc_dir, validate=False)\n        hardlink_lock = config.get(\"core\", {}).get(\"hardlink_lock\", False)\n        updater = Updater(tmp_dir, hardlink_lock=hardlink_lock)\n\n        logger.info(\"Starting updater to fetch the latest version\")\n        updater.fetch(detach=False)\n\n        return 0\n\n\nclass CmdDaemonAnalytics(CmdDaemonBase):\n    def run(self):\n        from dvc import analytics\n\n        logger.info(\"Sending analytics\")\n        analytics.send(self.args.target)\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    DAEMON_HELP = \"Service daemon.\"\n    daemon_parser = subparsers.add_parser(\n        \"daemon\",\n        parents=[parent_parser],\n        description=DAEMON_HELP,\n        add_help=False,\n    )\n\n    daemon_subparsers = daemon_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc daemon CMD --help` for command-specific help.\",\n        required=True,\n    )\n\n    DAEMON_UPDATER_HELP = \"Fetch latest available version.\"\n    daemon_updater_parser = daemon_subparsers.add_parser(\n        \"updater\",\n        parents=[parent_parser],\n        description=DAEMON_UPDATER_HELP,\n        help=DAEMON_UPDATER_HELP,\n    )\n    daemon_updater_parser.set_defaults(func=CmdDaemonUpdater)\n\n    DAEMON_ANALYTICS_HELP = \"Send dvc usage analytics.\"\n    daemon_analytics_parser = daemon_subparsers.add_parser(\n        \"analytics\",\n        parents=[parent_parser],\n        description=DAEMON_ANALYTICS_HELP,\n        help=DAEMON_ANALYTICS_HELP,\n    )\n    daemon_analytics_parser.add_argument(\n        \"target\", help=\"Analytics file.\"\n    ).complete = completion.FILE\n    daemon_analytics_parser.set_defaults(func=CmdDaemonAnalytics)\n"
  },
  {
    "path": "dvc/commands/dag.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.ui import ui\n\nif TYPE_CHECKING:\n    from networkx import DiGraph\n\n\ndef _show_ascii(graph: \"DiGraph\"):\n    from dvc.dagascii import draw\n    from dvc.repo.graph import get_pipelines\n\n    pipelines = get_pipelines(graph)\n\n    ret = []\n    for pipeline in pipelines:\n        ret.append(draw(pipeline.nodes, pipeline.edges))  # noqa: PERF401\n\n    return \"\\n\".join(ret)\n\n\ndef _quote_label(node):\n    label = str(node)\n    # Node names should not contain \":\" unless they are quoted with \"\".\n    # See: https://github.com/pydot/pydot/issues/258.\n    if label[0] != '\"' and label[-1] != '\"':\n        return f'\"{label}\"'\n    return label\n\n\ndef _show_dot(graph: \"DiGraph\"):\n    import io\n\n    import networkx as nx\n    from networkx.drawing.nx_pydot import write_dot\n\n    dot_file = io.StringIO()\n\n    nx.relabel_nodes(graph, _quote_label, copy=False)\n    write_dot(graph.reverse(), dot_file)\n    return dot_file.getvalue()\n\n\ndef _show_mermaid(graph, markdown: bool = False):\n    from dvc.repo.graph import get_pipelines\n\n    pipelines = get_pipelines(graph)\n\n    graph = \"flowchart TD\"\n\n    total_nodes = 0\n    for pipeline in pipelines:\n        node_ids = {}\n        nodes = sorted(str(x) for x in pipeline.nodes)\n        for node in nodes:\n            total_nodes += 1\n            node_id = f\"node{total_nodes}\"\n            graph += f'\\n\\t{node_id}[\"{node}\"]'\n            node_ids[node] = node_id\n        edges = sorted((str(a), str(b)) for b, a in pipeline.edges)\n        for a, b in edges:\n            graph += f\"\\n\\t{node_ids[str(a)]}-->{node_ids[str(b)]}\"\n\n    if markdown:\n        return f\"```mermaid\\n{graph}\\n```\"\n\n    return graph\n\n\ndef _collect_targets(repo, target, outs):\n    if not target:\n        return []\n\n    pairs = repo.stage.collect_granular(target)\n    if not outs:\n        return [stage.addressing for stage, _ in pairs]\n\n    targets = []\n\n    outs_trie = repo.index.outs_trie\n    for stage, path in pairs:\n        if not path:\n            targets.extend([str(out) for out in stage.outs])\n            continue\n\n        for out in outs_trie.itervalues(prefix=repo.fs.parts(path)):\n            targets.extend(str(out))\n\n    return targets\n\n\ndef _transform(index, outs):\n    import networkx as nx\n\n    from dvc.stage import Stage\n\n    def _relabel(node) -> str:\n        return node.addressing if isinstance(node, Stage) else str(node)\n\n    graph = index.outs_graph if outs else index.graph\n    return nx.relabel_nodes(graph, _relabel, copy=True)\n\n\ndef _filter(graph, targets, full):\n    import networkx as nx\n\n    if not targets:\n        return graph\n\n    new_graph = graph.copy()\n    if not full:\n        descendants = set()\n        for target in targets:\n            descendants.update(nx.descendants(graph, target))\n            descendants.add(target)\n        new_graph.remove_nodes_from(set(graph.nodes()) - descendants)\n\n    undirected = new_graph.to_undirected()\n    connected = set()\n    for target in targets:\n        connected.update(nx.node_connected_component(undirected, target))\n\n    new_graph.remove_nodes_from(set(new_graph.nodes()) - connected)\n    return new_graph\n\n\ndef _is_foreach_matrix_stage(node, join_string):\n    if node.endswith(\".dvc\"):\n        return False\n    return join_string in node\n\n\ndef _collapse_foreach_matrix_get_nodes(graph):\n    from dvc.parsing import JOIN\n\n    new_nodes = set()\n    nodes_to_remove = set()\n    for _node in list(graph.nodes):\n        if not _is_foreach_matrix_stage(_node, JOIN):\n            continue\n        nodes_to_remove.add(_node)\n        new_nodes.add(_node.split(JOIN)[0])\n    return new_nodes, nodes_to_remove\n\n\ndef _collapse_foreach_matrix_get_edges(graph):\n    from dvc.parsing import JOIN\n\n    new_edges = set()\n    edges_to_remove = set()\n    for _e1, _e2 in list(graph.edges):\n        _replace = False\n        _new_e1 = _e1\n        _new_e2 = _e2\n        if _is_foreach_matrix_stage(_e1, JOIN):\n            _new_e1 = _e1.split(JOIN)[0]\n            _replace = True\n        if _is_foreach_matrix_stage(_e2, JOIN):\n            _new_e2 = _e2.split(JOIN)[0]\n            _replace = True\n        if _replace:\n            edges_to_remove.add((_e1, _e2))\n            new_edges.add((_new_e1, _new_e2))\n    return new_edges, edges_to_remove\n\n\ndef _collapse_foreach_matrix(graph):\n    new_nodes, nodes_to_remove = _collapse_foreach_matrix_get_nodes(graph)\n    new_edges, edges_to_remove = _collapse_foreach_matrix_get_edges(graph)\n    new_graph = graph.copy()\n    new_graph.remove_edges_from(edges_to_remove)\n    new_graph.add_nodes_from(new_nodes)\n    new_graph.add_edges_from(new_edges)\n    new_graph.remove_nodes_from(nodes_to_remove)\n    return new_graph\n\n\ndef _build(repo, target=None, full=False, outs=False, collapse_foreach_matrix=False):\n    targets = _collect_targets(repo, target, outs)\n    graph = _transform(repo.index, outs)\n    filtered_graph = _filter(graph, targets, full)\n    if collapse_foreach_matrix:\n        filtered_graph = _collapse_foreach_matrix(filtered_graph)\n    return filtered_graph\n\n\nclass CmdDAG(CmdBase):\n    def run(self):\n        from dvc.exceptions import InvalidArgumentError\n\n        if self.args.outs and self.args.collapse_foreach_matrix:\n            raise InvalidArgumentError(\n                \"`--outs` and `--collapse-foreach-matrix` are mutually exclusive\"\n            )\n        graph = _build(\n            self.repo,\n            target=self.args.target,\n            full=self.args.full,\n            outs=self.args.outs,\n            collapse_foreach_matrix=self.args.collapse_foreach_matrix,\n        )\n\n        if self.args.dot:\n            ui.write(_show_dot(graph))\n        elif self.args.mermaid or self.args.markdown:\n            ui.write(_show_mermaid(graph, self.args.markdown))\n        else:\n            with ui.pager():\n                ui.write(_show_ascii(graph))\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    DAG_HELP = \"Visualize DVC project DAG.\"\n    dag_parser = subparsers.add_parser(\n        \"dag\",\n        parents=[parent_parser],\n        description=append_doc_link(DAG_HELP, \"dag\"),\n        help=DAG_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    dag_parser.add_argument(\n        \"--dot\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print DAG with .dot format.\",\n    )\n    dag_parser.add_argument(\n        \"--mermaid\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print DAG with mermaid format.\",\n    )\n    dag_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Print DAG with mermaid format wrapped in Markdown block.\",\n    )\n    dag_parser.add_argument(\n        \"--collapse-foreach-matrix\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Collapse stages from each foreach/matrix definition into a single node.\"\n        ),\n    )\n    dag_parser.add_argument(\n        \"--full\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Show full DAG that the target belongs too, instead of \"\n            \"showing DAG consisting only of ancestors.\"\n        ),\n    )\n    dag_parser.add_argument(\n        \"-o\",\n        \"--outs\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print output files instead of stages.\",\n    )\n    dag_parser.add_argument(\n        \"target\",\n        nargs=\"?\",\n        help=(\n            \"Stage name or output to show pipeline for. \"\n            \"Finds all stages in the workspace by default.\"\n        ),\n    )\n    dag_parser.set_defaults(func=CmdDAG)\n"
  },
  {
    "path": "dvc/commands/data.py",
    "content": "from typing import TYPE_CHECKING, ClassVar\n\nfrom funcy import chunks, compact, log_durations\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils import colorize\n\nif TYPE_CHECKING:\n    from dvc.repo.data import GitInfo\n    from dvc.repo.data import Status as DataStatus\n\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdDataStatus(CmdBase):\n    COLORS: ClassVar[dict[str, str]] = {\n        \"not_in_remote\": \"red\",\n        \"not_in_cache\": \"red\",\n        \"committed\": \"green\",\n        \"uncommitted\": \"yellow\",\n        \"untracked\": \"cyan\",\n    }\n    LABELS: ClassVar[dict[str, str]] = {\n        \"not_in_remote\": \"Not in remote\",\n        \"not_in_cache\": \"Not in cache\",\n        \"committed\": \"DVC committed changes\",\n        \"uncommitted\": \"DVC uncommitted changes\",\n        \"untracked\": \"Untracked files\",\n        \"unchanged\": \"DVC unchanged files\",\n    }\n    HINTS: ClassVar[dict[str, tuple[str, ...]]] = {\n        \"not_in_remote\": ('use \"dvc push <file>...\" to upload files',),\n        \"not_in_cache\": ('use \"dvc fetch <file>...\" to download files',),\n        \"committed\": (\"git commit the corresponding dvc files to update the repo\",),\n        \"uncommitted\": (\n            'use \"dvc commit <file>...\" to track changes',\n            'use \"dvc checkout <file>...\" to discard changes',\n        ),\n        \"untracked\": (\n            (\n                'use \"git add <file> ...\" or '\n                '\"dvc add <file>...\" to commit to git or to dvc'\n            ),\n        ),\n        \"git_dirty\": (\n            'there are {}changes not tracked by dvc, use \"git status\" to see',\n        ),\n    }\n\n    @staticmethod\n    def _process_status(status: \"DataStatus\"):\n        \"\"\"Flatten stage status, and filter empty stage status contents.\"\"\"\n        for stage, stage_status in status.items():\n            if not stage_status or (\n                isinstance(stage_status, dict) and not any(stage_status.values())\n            ):\n                continue\n            yield stage, stage_status\n\n    @classmethod\n    def _show_status(cls, status: \"DataStatus\") -> int:  # noqa: C901\n        git_info: GitInfo = status.pop(\"git\")  # type: ignore[misc]\n        result = dict(cls._process_status(status))\n        if not result:\n            no_changes = \"No changes\"\n            if git_info.get(\"is_empty\", False):\n                no_changes += \" in an empty git repo\"\n            ui.write(f\"{no_changes}.\")\n\n        for idx, (stage, stage_status) in enumerate(result.items()):\n            if idx:\n                ui.write()\n\n            label = cls.LABELS.get(stage, stage.capitalize() + \" files\")\n            header = f\"{label}:\"\n            color = cls.COLORS.get(stage, None)\n\n            ui.write(header)\n            if hints := cls.HINTS.get(stage):\n                for hint in hints:\n                    ui.write(f\"  ({hint})\")\n\n            if isinstance(stage_status, dict):\n                items = [\n                    f\"{state}: \"\n                    + (\n                        \" -> \".join(change.values())\n                        if isinstance(change, dict)\n                        else change\n                    )\n                    for state, changes in stage_status.items()\n                    for change in changes\n                ]\n            else:\n                items = stage_status\n\n            tabs = \"\\t\".expandtabs(8)\n            for chunk in chunks(1000, items):\n                out = \"\\n\".join(tabs + item for item in chunk)\n                ui.write(colorize(out, color))\n\n        if (hints := cls.HINTS.get(\"git_dirty\")) and git_info.get(\"is_dirty\"):\n            for hint in hints:\n                message = hint.format(\"other \" if result else \"\")\n                ui.write(f\"[blue]({message})[/]\", styled=True)\n        return 0\n\n    def run(self) -> int:\n        with log_durations(logger.trace, \"in data_status\"):\n            status = self.repo.data_status(\n                targets=self.args.targets,\n                granular=self.args.granular,\n                untracked_files=self.args.untracked_files,\n                remote=self.args.remote,\n                not_in_remote=self.args.not_in_remote,\n                remote_refresh=self.args.remote_refresh,\n            )\n\n        if not self.args.unchanged:\n            status.pop(\"unchanged\")  # type: ignore[misc]\n        if self.args.untracked_files == \"no\":\n            status.pop(\"untracked\")\n        if self.args.json:\n            status.pop(\"git\")  # type: ignore[misc]\n            ui.write_json(compact(status))\n            return 0\n        return self._show_status(status)\n\n\ndef add_parser(subparsers, parent_parser):\n    data_help = \"Commands related to data management.\"\n    data_parser = subparsers.add_parser(\n        \"data\",\n        parents=[parent_parser],\n        description=append_doc_link(data_help, \"data/status\"),\n        help=data_help,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    data_subparsers = data_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc data CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    DATA_STATUS_HELP = (\n        \"Show changes between the last git commit, the dvcfiles and the workspace.\"\n    )\n    data_status_parser = data_subparsers.add_parser(\n        \"status\",\n        parents=[parent_parser],\n        description=append_doc_link(DATA_STATUS_HELP, \"data/status\"),\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n        help=DATA_STATUS_HELP,\n    )\n    data_status_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Limit command scope to these tracked files/directories, \"\n            \".dvc files and stage names.\"\n        ),\n    ).complete = completion.FILE  # type: ignore[attr-defined]\n    data_status_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    data_status_parser.add_argument(\n        \"--granular\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show granular file-level info for DVC-tracked directories.\",\n    )\n    data_status_parser.add_argument(\n        \"--unchanged\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show unmodified DVC-tracked files.\",\n    )\n    data_status_parser.add_argument(\n        \"--untracked-files\",\n        choices=[\"no\", \"all\"],\n        default=\"no\",\n        const=\"all\",\n        nargs=\"?\",\n        help=\"Show untracked files.\",\n    )\n    data_status_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to check (only applicable with --not-in-remote).\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    data_status_parser.add_argument(\n        \"--not-in-remote\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show files not in remote.\",\n    )\n    data_status_parser.add_argument(\n        \"--no-remote-refresh\",\n        dest=\"remote_refresh\",\n        action=\"store_false\",\n        help=\"Use cached remote index (don't check remote).\",\n    )\n    data_status_parser.set_defaults(func=CmdDataStatus)\n"
  },
  {
    "path": "dvc/commands/data_sync.py",
    "content": "import argparse\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdDataBase(CmdBase):\n    def log_summary(self, stats):\n        from dvc.ui import ui\n        from dvc.utils.humanize import get_summary\n\n        default_msg = \"Everything is up to date.\"\n\n        if not self.args.remote and not self.repo.config[\"core\"].get(\"remote\"):\n            ui.warn(\"No remote provided and no default remote set.\")\n\n        ui.write(get_summary(stats.items()) or default_msg)\n\n\nclass CmdDataPull(CmdDataBase):\n    def log_summary(self, result):\n        from dvc.commands.checkout import log_changes\n\n        stats = result.pop(\"stats\", {})\n        log_changes(result)\n        super().log_summary(stats)\n\n    def run(self):\n        from dvc.exceptions import CheckoutError, DvcException\n\n        try:\n            result = self.repo.pull(\n                targets=self.args.targets,\n                jobs=self.args.jobs,\n                remote=self.args.remote,\n                all_branches=self.args.all_branches,\n                all_tags=self.args.all_tags,\n                all_commits=self.args.all_commits,\n                with_deps=self.args.with_deps,\n                force=self.args.force,\n                recursive=self.args.recursive,\n                run_cache=self.args.run_cache,\n                glob=self.args.glob,\n                allow_missing=self.args.allow_missing,\n            )\n            self.log_summary(result)\n        except (CheckoutError, DvcException) as exc:\n            if result := getattr(exc, \"result\", {}):\n                self.log_summary(result)\n            logger.exception(\"failed to pull data from the cloud\")\n            return 1\n\n        return 0\n\n\nclass CmdDataPush(CmdDataBase):\n    def run(self):\n        from dvc.exceptions import DvcException\n\n        try:\n            processed_files_count = self.repo.push(\n                targets=self.args.targets,\n                jobs=self.args.jobs,\n                remote=self.args.remote,\n                all_branches=self.args.all_branches,\n                all_tags=self.args.all_tags,\n                all_commits=self.args.all_commits,\n                with_deps=self.args.with_deps,\n                recursive=self.args.recursive,\n                run_cache=self.args.run_cache,\n                glob=self.args.glob,\n            )\n            self.log_summary({\"pushed\": processed_files_count})\n        except DvcException:\n            logger.exception(\"failed to push data to the cloud\")\n            return 1\n        return 0\n\n\nclass CmdDataFetch(CmdDataBase):\n    def run(self):\n        from dvc.exceptions import DvcException\n\n        try:\n            processed_files_count = self.repo.fetch(\n                targets=self.args.targets,\n                jobs=self.args.jobs,\n                remote=self.args.remote,\n                all_branches=self.args.all_branches,\n                all_tags=self.args.all_tags,\n                all_commits=self.args.all_commits,\n                with_deps=self.args.with_deps,\n                recursive=self.args.recursive,\n                run_cache=self.args.run_cache,\n                max_size=self.args.max_size,\n                types=self.args.types,\n            )\n            self.log_summary({\"fetched\": processed_files_count})\n        except DvcException:\n            logger.exception(\"failed to fetch data from the cloud\")\n            return 1\n        return 0\n\n\ndef shared_parent_parser():\n    from dvc.cli.parser import get_parent_parser\n\n    # Parent parser used in pull/push/status\n    parent_parser = argparse.ArgumentParser(\n        add_help=False, parents=[get_parent_parser()]\n    )\n    parent_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    parent_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Limit command scope to these tracked files/directories, \"\n            \".dvc files and stage names.\"\n        ),\n    ).complete = completion.DVC_FILE  # type: ignore[attr-defined]\n\n    return parent_parser\n\n\ndef add_parser(subparsers, _parent_parser):\n    from dvc.commands.status import CmdDataStatus\n\n    # Pull\n    PULL_HELP = \"Download tracked files or directories from remote storage.\"\n\n    pull_parser = subparsers.add_parser(\n        \"pull\",\n        parents=[shared_parent_parser()],\n        description=append_doc_link(PULL_HELP, \"pull\"),\n        help=PULL_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    pull_parser.add_argument(\n        \"-r\", \"--remote\", help=\"Remote storage to pull from\", metavar=\"<name>\"\n    ).complete = completion.REMOTE\n    pull_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all branches.\",\n    )\n    pull_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all tags.\",\n    )\n    pull_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all commits.\",\n    )\n    pull_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Do not prompt when removing working directory files.\",\n    )\n    pull_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all dependencies of the specified target.\",\n    )\n    pull_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Pull cache for subdirectories of the specified directory.\",\n    )\n    pull_parser.add_argument(\n        \"--run-cache\",\n        action=argparse.BooleanOptionalAction,\n        default=False,\n        help=\"Fetch run history for all stages.\",\n    )\n    pull_parser.add_argument(\n        \"--glob\",\n        action=\"store_true\",\n        default=False,\n        help=argparse.SUPPRESS,\n    )\n    pull_parser.add_argument(\n        \"--allow-missing\",\n        action=\"store_true\",\n        default=False,\n        help=\"Ignore errors if some of the files or directories are missing.\",\n    )\n    pull_parser.set_defaults(func=CmdDataPull)\n\n    # Push\n    PUSH_HELP = \"Upload tracked files or directories to remote storage.\"\n\n    push_parser = subparsers.add_parser(\n        \"push\",\n        parents=[shared_parent_parser()],\n        description=append_doc_link(PUSH_HELP, \"push\"),\n        help=PUSH_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    push_parser.add_argument(\n        \"-r\", \"--remote\", help=\"Remote storage to push to\", metavar=\"<name>\"\n    ).complete = completion.REMOTE\n    push_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Push cache for all branches.\",\n    )\n    push_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Push cache for all tags.\",\n    )\n    push_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=\"Push cache for all commits.\",\n    )\n    push_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Push cache for all dependencies of the specified target.\",\n    )\n    push_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Push cache for subdirectories of specified directory.\",\n    )\n    push_parser.add_argument(\n        \"--run-cache\",\n        action=argparse.BooleanOptionalAction,\n        default=False,\n        help=\"Push run history for all stages.\",\n    )\n    push_parser.add_argument(\n        \"--glob\",\n        action=\"store_true\",\n        default=False,\n        help=\"Allows targets containing shell-style wildcards.\",\n    )\n    push_parser.set_defaults(func=CmdDataPush)\n\n    # Fetch\n    FETCH_HELP = \"Download files or directories from remote storage to the cache.\"\n\n    fetch_parser = subparsers.add_parser(\n        \"fetch\",\n        parents=[shared_parent_parser()],\n        description=append_doc_link(FETCH_HELP, \"fetch\"),\n        help=FETCH_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    fetch_parser.add_argument(\n        \"-r\", \"--remote\", help=\"Remote storage to fetch from\", metavar=\"<name>\"\n    ).complete = completion.REMOTE\n    fetch_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all branches.\",\n    )\n    fetch_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all tags.\",\n    )\n    fetch_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all commits.\",\n    )\n    fetch_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for all dependencies of the specified target.\",\n    )\n    fetch_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fetch cache for subdirectories of specified directory.\",\n    )\n    fetch_parser.add_argument(\n        \"--run-cache\",\n        action=argparse.BooleanOptionalAction,\n        default=False,\n        help=\"Fetch run history for all stages.\",\n    )\n    fetch_parser.add_argument(\n        \"--max-size\",\n        type=int,\n        help=\"Fetch data files/directories that are each below specified size (bytes).\",\n    )\n    fetch_parser.add_argument(\n        \"--type\",\n        dest=\"types\",\n        action=\"append\",\n        default=[],\n        help=(\n            \"Only fetch data files/directories that are of a particular \"\n            \"type (metrics, plots).\"\n        ),\n        choices=[\"metrics\", \"plots\"],\n    )\n    fetch_parser.set_defaults(func=CmdDataFetch)\n\n    # Status\n    STATUS_HELP = \"Show changed stages, compare local cache and a remote storage.\"\n\n    status_parser = subparsers.add_parser(\n        \"status\",\n        parents=[shared_parent_parser()],\n        description=append_doc_link(STATUS_HELP, \"status\"),\n        help=STATUS_HELP,\n        conflict_handler=\"resolve\",\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    status_parser.add_argument(\n        \"-q\",\n        \"--quiet\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Suppresses all output.\"\n            \" Exit with 0 if pipelines are up to date, otherwise 1.\"\n        ),\n    )\n    status_parser.add_argument(\n        \"-c\",\n        \"--cloud\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show status of a local cache compared to a remote repository.\",\n    )\n    status_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to compare local cache to\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    status_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Show status of a local cache compared to a remote repository \"\n            \"for all branches.\"\n        ),\n    )\n    status_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Show status of a local cache compared to a remote repository for all tags.\"\n        ),\n    )\n    status_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Show status of a local cache compared to a remote repository \"\n            \"for all commits.\"\n        ),\n    )\n    status_parser.add_argument(\n        \"-d\",\n        \"--with-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show status for all dependencies of the specified target.\",\n    )\n    status_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show status of all stages in the specified directory.\",\n    )\n    status_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show status in JSON format.\",\n    )\n    status_parser.add_argument(\n        \"--no-updates\",\n        dest=\"check_updates\",\n        action=\"store_false\",\n        help=\"Ignore updates to imported data.\",\n    )\n\n    status_parser.set_defaults(func=CmdDataStatus)\n"
  },
  {
    "path": "dvc/commands/dataset.py",
    "content": "from typing import TYPE_CHECKING, Optional\n\nfrom dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from rich.text import Text\n\n    from dvc.repo.datasets import Dataset, FileInfo\n\nlogger = logger.getChild(__name__)\n\n\ndef diff_files(old: list[\"FileInfo\"], new: list[\"FileInfo\"]) -> dict[str, list[str]]:\n    old_files = {d.relpath: d for d in old}\n    new_files = {d.relpath: d for d in new}\n    rest = old_files.keys() & new_files.keys()\n    return {\n        \"added\": list(new_files.keys() - old_files.keys()),\n        \"deleted\": list(old_files.keys() - new_files.keys()),\n        \"modified\": [p for p in rest if new_files[p] != old_files[p]],\n    }\n\n\nclass CmdDatasetAdd(CmdBase):\n    @classmethod\n    def display(cls, name: str, dataset: \"Dataset\", action: str = \"Adding\"):\n        from dvc.ui import ui\n\n        assert dataset.lock\n\n        url = dataset.spec.url\n        ver: str = \"\"\n        if dataset.type == \"dc\":\n            ver = f\"v{dataset.lock.version}\"\n        if dataset.type == \"dvc\":\n            if dataset.lock.path:\n                url = f\"{url}:/{dataset.lock.path.lstrip('/')}\"\n            if rev := dataset.lock.rev:\n                ver = rev\n\n        ver_part: Optional[Text] = None\n        if ver:\n            ver_part = ui.rich_text.assemble(\" @ \", (ver, \"repr.number\"))\n        text = ui.rich_text.assemble(\"(\", (url, \"repr.url\"), ver_part or \"\", \")\")\n        ui.write(action, ui.rich_text(name, \"cyan\"), text, styled=True)\n\n    def run(self):\n        if not self.args.dvc and self.args.rev:\n            raise DvcException(\"--rev can't be used without --dvc\")\n        if not self.args.dvc and self.args.path:\n            raise DvcException(\"--path can't be used without --dvc\")\n\n        d = vars(self.args)\n        for key in [\"dvc\", \"dc\", \"url\"]:\n            if url := d.pop(key, None):\n                d.update({\"type\": key, \"url\": url})\n                break\n\n        existing = self.repo.datasets.get(self.args.name)\n        with self.repo.scm_context:\n            if not self.args.force and existing:\n                path = self.repo.fs.relpath(existing.manifest_path)\n                raise DvcException(\n                    f\"{self.args.name} already exists in {path}, \"\n                    \"use the --force to overwrite\"\n                )\n            dataset = self.repo.datasets.add(**d)\n            self.display(self.args.name, dataset)\n            return 0\n\n\nclass CmdDatasetUpdate(CmdBase):\n    def display(self, name: str, dataset: \"Dataset\", new: \"Dataset\"):\n        from dvc.commands.checkout import log_changes\n        from dvc.ui import ui\n\n        action = \"Updating\"\n        if not dataset.lock:\n            return CmdDatasetAdd.display(name, new, action)\n        if dataset == new:\n            ui.write(\"[yellow]Nothing to update[/]\", styled=True)\n            return None\n\n        assert new.lock\n\n        v: Optional[tuple[str, str]] = None\n        if dataset.type == \"dc\":\n            assert new.type == \"dc\"\n            if new.lock.version < dataset.lock.version:\n                action = \"Downgrading\"\n\n            v = (f\"v{dataset.lock.version}\", f\"v{new.lock.version}\")\n        if dataset.type == \"dvc\":\n            assert new.type == \"dvc\"\n            v = (f\"{dataset.lock.rev_lock[:9]}\", f\"{new.lock.rev_lock[:9]}\")\n\n        if v:\n            part = ui.rich_text.assemble(\n                (v[0], \"repr.number\"),\n                \" -> \",\n                (v[1], \"repr.number\"),\n            )\n        else:\n            part = ui.rich_text(dataset.spec.url, \"repr.url\")\n        changes = ui.rich_text.assemble(\"(\", part, \")\")\n        ui.write(action, ui.rich_text(name, \"cyan\"), changes, styled=True)\n        if dataset.type == \"url\":\n            assert new.type == \"url\"\n            stats = diff_files(dataset.lock.files, new.lock.files)\n            log_changes(stats)\n        return None\n\n    def run(self):\n        from difflib import get_close_matches\n\n        from dvc.repo.datasets import DatasetNotFoundError\n        from dvc.ui import ui\n\n        version = None\n        if self.args.rev:\n            try:\n                version = int(self.args.rev.lstrip(\"v\"))\n            except ValueError:\n                version = self.args.rev\n\n        d = vars(self.args) | {\"version\": version}\n        with self.repo.scm_context:\n            try:\n                dataset, new = self.repo.datasets.update(**d)\n            except DatasetNotFoundError:\n                logger.exception(\"\")\n                if matches := get_close_matches(self.args.name, self.repo.datasets):\n                    ui.write(\n                        \"did you mean?\",\n                        ui.rich_text(matches[0], \"cyan\"),\n                        stderr=True,\n                        styled=True,\n                    )\n                return 1\n            self.display(self.args.name, dataset, new)\n            return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    ds_parser = subparsers.add_parser(\n        \"dataset\",\n        aliases=[\"ds\"],\n        parents=[parent_parser],\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    ds_subparsers = ds_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc dataset CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    dataset_add_help = \"Add a dataset.\"\n    ds_add_parser = ds_subparsers.add_parser(\n        \"add\",\n        parents=[parent_parser],\n        description=append_doc_link(dataset_add_help, \"dataset/add\"),\n        formatter_class=formatter.RawTextHelpFormatter,\n        help=dataset_add_help,\n    )\n\n    url_exclusive_group = ds_add_parser.add_mutually_exclusive_group(required=True)\n    url_exclusive_group.add_argument(\n        \"--dc\", metavar=\"name\", help=\"Name of the DataChain dataset to track\"\n    )\n    url_exclusive_group.add_argument(\n        \"--dvc\",\n        help=\"Path or URL to a Git/DVC repository to track\",\n        metavar=\"url\",\n    )\n    url_exclusive_group.add_argument(\n        \"--url\",\n        help=\"\"\"\\\nURL of a cloud-versioned remote to track. Supported URLs:\n\ns3://bucket/key/path\ngs://bucket/path/to/file/or/dir\nazure://mycontainer/path\nremote://remote_name/path/to/file/or/dir (see `dvc remote`)\n\"\"\",\n    )\n    ds_add_parser.add_argument(\"name\", help=\"Name of the dataset to add\")\n    ds_add_parser.add_argument(\n        \"--rev\",\n        help=\"Git revision, e.g. SHA, branch, tag (only applicable with --dvc)\",\n        metavar=\"<commit>\",\n    )\n    ds_add_parser.add_argument(\n        \"--path\",\n        help=\"Path to a file or a directory within a git repository \"\n        \"(only applicable with --dvc)\",\n    )\n    ds_add_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Overwrite existing dataset\",\n    )\n    ds_add_parser.set_defaults(func=CmdDatasetAdd)\n\n    dataset_update_help = \"Update a dataset.\"\n    ds_update_parser = ds_subparsers.add_parser(\n        \"update\",\n        parents=[parent_parser],\n        description=append_doc_link(dataset_update_help, \"dataset/update\"),\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n        help=dataset_update_help,\n    )\n    ds_update_parser.add_argument(\"name\", help=\"Name of the dataset to update\")\n    ds_update_parser.add_argument(\n        \"--rev\",\n        \"--version\",\n        nargs=\"?\",\n        help=\"DataChain dataset version or Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<version>\",\n    )\n    ds_update_parser.set_defaults(func=CmdDatasetUpdate)\n"
  },
  {
    "path": "dvc/commands/destroy.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdDestroy(CmdBase):\n    def run(self):\n        from dvc.exceptions import DvcException\n        from dvc.ui import ui\n\n        try:\n            statement = (\n                \"This will destroy all information about your pipelines,\"\n                \" all data files, as well as cache in .dvc/cache.\"\n                \"\\n\"\n                \"Are you sure you want to continue?\"\n            )\n\n            if not self.args.force and not ui.confirm(statement):\n                raise DvcException(  # noqa: TRY301\n                    \"cannot destroy without a confirmation from the user.\"\n                    \" Use `-f` to force.\"\n                )\n\n            self.repo.destroy()\n        except DvcException:\n            logger.exception(\"failed to destroy DVC\")\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    DESTROY_HELP = \"Remove DVC files, local DVC config and data cache.\"\n\n    destroy_parser = subparsers.add_parser(\n        \"destroy\",\n        parents=[parent_parser],\n        description=append_doc_link(DESTROY_HELP, \"destroy\"),\n        help=DESTROY_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    destroy_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Force destruction.\",\n    )\n    destroy_parser.set_defaults(func=CmdDestroy)\n"
  },
  {
    "path": "dvc/commands/diff.py",
    "content": "import os\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\ndef _digest(checksum):\n    if isinstance(checksum, str):\n        return checksum[0:8]\n    return \"{}..{}\".format(checksum[\"old\"][0:8], checksum[\"new\"][0:8])\n\n\ndef _show_markdown(diff, show_hash=False, hide_missing=False):\n    headers = [\"Status\", \"Hash\", \"Path\"] if show_hash else [\"Status\", \"Path\"]\n    rows = []\n    statuses = [\"added\", \"deleted\", \"renamed\", \"modified\"]\n    if not hide_missing:\n        statuses.append(\"not in cache\")\n\n    for status in statuses:\n        entries = diff.get(status, [])\n        if not entries:\n            continue\n        for entry in entries:\n            path = entry[\"path\"]\n            if isinstance(path, dict):\n                path = f\"{path['old']} -> {path['new']}\"\n            if show_hash:\n                check_sum = _digest(entry.get(\"hash\", \"\"))\n                rows.append([status, check_sum, path])\n            else:\n                rows.append([status, path])\n\n    ui.table(rows, headers=headers, markdown=True)\n\n\nclass CmdDiff(CmdBase):\n    @staticmethod\n    def _show_diff(diff, hide_missing=False):\n        \"\"\"\n        Given a diff structure, generate a string of paths separated\n        by new lines and grouped together by their state.\n\n        A group's header is colored to enhance readability, for example:\n\n            Added:\n                another_file.txt\n                backup.tar\n                dir/\n                dir/1\n\n        An example of a diff formatted when entries contain hash:\n\n            Added:\n                d3b07384 foo\n\n            Modified:\n                c157a790..f98bf6f1 bar\n\n        If a group has no entries, it won't be included in the result.\n\n        At the bottom, include a summary with the number of files per state.\n        \"\"\"\n\n        colors = {\n            \"added\": \"green\",\n            \"modified\": \"yellow\",\n            \"deleted\": \"red\",\n            \"renamed\": \"green\",\n            \"not in cache\": \"yellow\",\n        }\n\n        summary = {}\n\n        states = [\"added\", \"deleted\", \"renamed\", \"modified\"]\n        if not hide_missing:\n            states.append(\"not in cache\")\n        for state in states:\n            summary[state] = 0\n            entries = diff[state]\n\n            if not entries:\n                continue\n\n            header = state.capitalize()\n            ui.write(f\"[{colors[state]}]{header}[/]:\", styled=True)\n\n            for entry in entries:\n                path = entry[\"path\"]\n                if isinstance(path, dict):\n                    path = f\"{path['old']} -> {path['new']}\"\n                checksum = entry.get(\"hash\")\n                summary[state] += 1 if not path.endswith(os.sep) else 0\n                ui.write(\n                    \"{space}{checksum}{separator}{path}\".format(\n                        space=\"    \",\n                        checksum=_digest(checksum) if checksum else \"\",\n                        separator=\"  \" if checksum else \"\",\n                        path=path,\n                    )\n                )\n\n            ui.write()\n\n        if not sum(summary.values()):\n            return\n\n        states_summary = \", \".join(\n            f\"{summary[state]} {state}\" for state in states if summary[state] > 0\n        )\n        ui.write(\"files summary:\", states_summary)\n\n    def run(self):\n        from dvc.exceptions import DvcException\n\n        try:\n            diff = self.repo.diff(self.args.a_rev, self.args.b_rev, self.args.targets)\n            show_hash = self.args.show_hash\n            hide_missing = self.args.b_rev or self.args.hide_missing\n            if hide_missing:\n                diff.pop(\"not in cache\", None)\n\n            for key, entries in diff.items():\n                entries = sorted(\n                    entries,\n                    key=lambda entry: (\n                        entry[\"path\"][\"old\"]\n                        if isinstance(entry[\"path\"], dict)\n                        else entry[\"path\"]\n                    ),\n                )\n                if not show_hash:\n                    for entry in entries:\n                        del entry[\"hash\"]\n                diff[key] = entries\n\n            if self.args.json:\n                ui.write_json(diff)\n            elif self.args.markdown:\n                _show_markdown(diff, show_hash, hide_missing)\n            elif diff:\n                self._show_diff(diff, hide_missing)\n\n        except DvcException:\n            logger.exception(\"failed to get diff\")\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    DIFF_DESCRIPTION = (\n        \"Show added, modified, or deleted data between commits in the DVC\"\n        \" repository, or between a commit and the workspace.\"\n    )\n    diff_parser = subparsers.add_parser(\n        \"diff\",\n        parents=[parent_parser],\n        description=append_doc_link(DIFF_DESCRIPTION, \"diff\"),\n        help=DIFF_DESCRIPTION,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    diff_parser.add_argument(\n        \"--targets\",\n        nargs=\"*\",\n        help=\"Specific DVC-tracked files to compare. Accepts one or more file paths.\",\n        metavar=\"<paths>\",\n    ).complete = completion.FILE\n    diff_parser.add_argument(\n        \"a_rev\",\n        help=\"Old Git commit to compare (defaults to HEAD)\",\n        nargs=\"?\",\n        default=\"HEAD\",\n    )\n    diff_parser.add_argument(\n        \"b_rev\",\n        help=\"New Git commit to compare (defaults to the current workspace)\",\n        nargs=\"?\",\n    )\n    diff_parser.add_argument(\n        \"--json\",\n        help=\"Format the output into a JSON\",\n        action=\"store_true\",\n        default=False,\n    )\n    diff_parser.add_argument(\n        \"--show-hash\",\n        help=\"Display hash value for each entry\",\n        action=\"store_true\",\n        default=False,\n    )\n    diff_parser.add_argument(\n        \"--md\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n        action=\"store_true\",\n        dest=\"markdown\",\n        default=False,\n    )\n    diff_parser.add_argument(\n        \"--hide-missing\",\n        help=\"Hide missing cache file status.\",\n        action=\"store_true\",\n    )\n    diff_parser.set_defaults(func=CmdDiff)\n"
  },
  {
    "path": "dvc/commands/du.py",
    "content": "import logging\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.ui import ui\n\nlogger = logging.getLogger(__name__)\n\n\nclass CmdDU(CmdBaseNoRepo):\n    def run(self):\n        from dvc.repo import Repo\n        from dvc.utils.humanize import naturalsize\n\n        entries = Repo.du(\n            self.args.url,\n            self.args.path,\n            rev=self.args.rev,\n            summarize=self.args.summarize,\n            config=self.args.config,\n            remote=self.args.remote,\n            remote_config=self.args.remote_config,\n        )\n        ui.table([(naturalsize(size), path) for path, size in entries])\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    DU_HELP = \"Show disk usage.\"\n    du_parser = subparsers.add_parser(\n        \"du\",\n        parents=[parent_parser],\n        description=append_doc_link(DU_HELP, \"du\"),\n        help=DU_HELP,\n        formatter_class=formatter.RawTextHelpFormatter,\n    )\n    du_parser.add_argument(\"url\", help=\"Location of DVC repository\")\n    du_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<commit>\",\n    )\n    du_parser.add_argument(\n        \"-s\",\n        \"--summarize\",\n        action=\"store_true\",\n        help=\"Show total disk usage.\",\n    )\n    du_parser.add_argument(\n        \"--config\",\n        type=str,\n        help=(\n            \"Path to a config file that will be merged with the config \"\n            \"in the target repository.\"\n        ),\n    )\n    du_parser.add_argument(\n        \"--remote\",\n        type=str,\n        help=\"Remote name to set as a default in the target repository.\",\n    ).complete = completion.REMOTE\n    du_parser.add_argument(\n        \"--remote-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=(\n            \"Remote config options to merge with a remote's config (default or one \"\n            \"specified by '--remote') in the target repository.\"\n        ),\n    )\n    du_parser.add_argument(\n        \"path\",\n        nargs=\"?\",\n        help=\"Path to directory within the repository\",\n    ).complete = completion.DIR\n    du_parser.set_defaults(func=CmdDU)\n"
  },
  {
    "path": "dvc/commands/experiments/__init__.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.utils import append_doc_link, hide_subparsers_from_help\nfrom dvc.commands.experiments import (\n    apply,\n    branch,\n    clean,\n    diff,\n    exec_run,\n    ls,\n    pull,\n    push,\n    queue_worker,\n    remove,\n    rename,\n    run,\n    save,\n    show,\n)\n\nSUB_COMMANDS = [\n    apply,\n    branch,\n    clean,\n    diff,\n    exec_run,\n    ls,\n    pull,\n    push,\n    queue_worker,\n    remove,\n    rename,\n    run,\n    save,\n    show,\n]\n\n\ndef add_parser(subparsers, parent_parser):\n    EXPERIMENTS_HELP = \"Commands to run and compare experiments.\"\n\n    experiments_parser = subparsers.add_parser(\n        \"experiments\",\n        parents=[parent_parser],\n        aliases=[\"exp\"],\n        description=append_doc_link(EXPERIMENTS_HELP, \"exp\"),\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n        help=EXPERIMENTS_HELP,\n    )\n\n    experiments_subparsers = experiments_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc experiments CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    for cmd in SUB_COMMANDS:\n        cmd.add_parser(experiments_subparsers, parent_parser)\n    hide_subparsers_from_help(experiments_subparsers)\n\n\ndef add_keep_selection_flag(experiments_subcmd_parser):\n    experiments_subcmd_parser.add_argument(\n        \"--keep\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep the selected experiments instead of removing them.\",\n    )\n\n\ndef add_rev_selection_flags(\n    experiments_subcmd_parser, command: str, default: bool = True\n):\n    experiments_subcmd_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            f\"{command} all experiments in the repository \"\n            \"(overrides `--rev` and `--num`).\"\n        ),\n    )\n    default_msg = \" (HEAD by default)\" if default else \"\"\n    msg = (\n        f\"{command} experiments derived from the specified `<commit>` as \"\n        f\"baseline{default_msg}.\"\n    )\n    experiments_subcmd_parser.add_argument(\n        \"--rev\",\n        type=str,\n        action=\"append\",\n        default=None,\n        help=msg,\n        metavar=\"<commit>\",\n    )\n    experiments_subcmd_parser.add_argument(\n        \"-n\",\n        \"--num\",\n        type=int,\n        default=1,\n        dest=\"num\",\n        metavar=\"<num>\",\n        help=(\n            f\"{command} experiments from the last `num` commits \"\n            \"(first parents) starting from the `--rev` baseline. \"\n            \"Give a negative value to include all first-parent commits \"\n            \"(similar to `git log -n`).\"\n        ),\n    )\n"
  },
  {
    "path": "dvc/commands/experiments/apply.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsApply(CmdBase):\n    def run(self):\n        if not self.args.force:\n            ui.write(\n                \"The --no-force option is deprecated and will be removed in a future\"\n                \" DVC release. To revert the result of 'exp apply', run:\\n\"\n                \"\\n\\tgit reset --hard\\n\"\n                \"\\tgit stash apply refs/exps/apply/stash\\n\"\n            )\n        self.repo.experiments.apply(self.args.experiment)\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_APPLY_HELP = \"Apply the changes from an experiment to your workspace.\"\n    experiments_apply_parser = experiments_subparsers.add_parser(\n        \"apply\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_APPLY_HELP, \"exp/apply\"),\n        help=EXPERIMENTS_APPLY_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    experiments_apply_parser.add_argument(\n        \"--no-force\",\n        action=\"store_false\",\n        dest=\"force\",\n        help=\"Fail if this command would overwrite conflicting changes.\",\n    )\n    experiments_apply_parser.add_argument(\n        \"experiment\", help=\"Experiment to be applied.\"\n    ).complete = completion.EXPERIMENT\n    experiments_apply_parser.set_defaults(func=CmdExperimentsApply)\n"
  },
  {
    "path": "dvc/commands/experiments/branch.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsBranch(CmdBase):\n    def run(self):\n        self.repo.experiments.branch(self.args.experiment, self.args.branch)\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_BRANCH_HELP = \"Promote an experiment to a Git branch.\"\n    experiments_branch_parser = experiments_subparsers.add_parser(\n        \"branch\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_BRANCH_HELP, \"exp/branch\"),\n        help=EXPERIMENTS_BRANCH_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    experiments_branch_parser.add_argument(\n        \"experiment\", help=\"Experiment to be promoted.\"\n    )\n    experiments_branch_parser.add_argument(\n        \"branch\",\n        nargs=\"?\",\n        default=None,\n        help=(\n            \"Optional name for the new Git branch. \"\n            \"Defaults to '{experiment-name}-branch'.\"\n        ),\n    )\n    experiments_branch_parser.set_defaults(func=CmdExperimentsBranch)\n"
  },
  {
    "path": "dvc/commands/experiments/clean.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsClean(CmdBase):\n    def run(self):\n        self.repo.experiments.clean()\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_CLEAN_HELP = \"Cleanup experiments temporary internal files.\"\n    experiments_clean_parser = experiments_subparsers.add_parser(\n        \"clean\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_CLEAN_HELP, \"exp/clean\"),\n        help=EXPERIMENTS_CLEAN_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    experiments_clean_parser.set_defaults(func=CmdExperimentsClean)\n"
  },
  {
    "path": "dvc/commands/experiments/diff.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.metrics import DEFAULT_PRECISION\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsDiff(CmdBase):\n    def run(self):\n        try:\n            diff = self.repo.experiments.diff(\n                a_rev=self.args.a_rev,\n                b_rev=self.args.b_rev,\n                all=self.args.all,\n                param_deps=self.args.param_deps,\n            )\n        except DvcException:\n            logger.exception(\"failed to show experiments diff\")\n            return 1\n\n        if self.args.json:\n            ui.write_json(diff)\n        elif diff:\n            from dvc.compare import show_diff\n\n            precision = self.args.precision or DEFAULT_PRECISION\n            diffs = [(\"metrics\", \"Metric\"), (\"params\", \"Param\")]\n            for idx, (key, title) in enumerate(diffs):\n                if idx:\n                    # we are printing tables even in `--quiet` mode\n                    # so we should also be printing the \"table\" separator\n                    ui.write(force=True)\n\n                show_diff(\n                    diff[key],\n                    title=title,\n                    markdown=self.args.markdown,\n                    no_path=self.args.no_path,\n                    on_empty_diff=\"diff not supported\",\n                    precision=precision if key == \"metrics\" else None,\n                    a_rev=self.args.a_rev,\n                    b_rev=self.args.b_rev,\n                )\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_DIFF_HELP = \"Show changes between experiments.\"\n\n    experiments_diff_parser = experiments_subparsers.add_parser(\n        \"diff\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_DIFF_HELP, \"exp/diff\"),\n        help=EXPERIMENTS_DIFF_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    experiments_diff_parser.add_argument(\n        \"a_rev\", nargs=\"?\", help=\"Old experiment to compare (defaults to HEAD)\"\n    ).complete = completion.EXPERIMENT\n    experiments_diff_parser.add_argument(\n        \"b_rev\",\n        nargs=\"?\",\n        help=\"New experiment to compare (defaults to the current workspace)\",\n    ).complete = completion.EXPERIMENT\n    experiments_diff_parser.add_argument(\n        \"--all\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show unchanged metrics/params as well.\",\n    )\n    experiments_diff_parser.add_argument(\n        \"--param-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show only params that are stage dependencies.\",\n    )\n    experiments_diff_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    experiments_diff_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n    )\n    experiments_diff_parser.add_argument(\n        \"--no-path\",\n        action=\"store_true\",\n        default=False,\n        help=\"Don't show metric/param path.\",\n    )\n    experiments_diff_parser.add_argument(\n        \"--precision\",\n        type=int,\n        help=(\n            \"Round metrics/params to `n` digits precision after the decimal \"\n            f\"point. Rounds to {DEFAULT_PRECISION} digits by default.\"\n        ),\n        metavar=\"<n>\",\n    )\n    experiments_diff_parser.set_defaults(func=CmdExperimentsDiff)\n"
  },
  {
    "path": "dvc/commands/experiments/exec_run.py",
    "content": "from dvc.cli.command import CmdBaseNoRepo\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExecutorRun(CmdBaseNoRepo):\n    \"\"\"Run an experiment executor.\"\"\"\n\n    def run(self):\n        from dvc.repo.experiments.executor.base import BaseExecutor, ExecutorInfo\n        from dvc.utils.serialize import load_json\n\n        info = ExecutorInfo.from_dict(load_json(self.args.infofile))\n        BaseExecutor.reproduce(\n            info=info,\n            rev=\"\",\n            queue=None,\n            log_level=logger.getEffectiveLevel(),\n            infofile=self.args.infofile,\n            copy_paths=self.args.copy_paths,\n            message=self.args.message,\n        )\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXEC_RUN_HELP = \"Run an experiment executor.\"\n    exec_run_parser = experiments_subparsers.add_parser(\n        \"exec-run\",\n        parents=[parent_parser],\n        description=EXEC_RUN_HELP,\n        add_help=False,\n    )\n    exec_run_parser.add_argument(\n        \"--infofile\",\n        help=\"Path to executor info file\",\n        default=None,\n    )\n    exec_run_parser.add_argument(\n        \"-C\",\n        \"--copy-paths\",\n        action=\"append\",\n        default=[],\n        help=(\n            \"List of ignored or untracked paths to copy into the temp directory.\"\n            \" Only used if `--temp` or `--queue` is specified.\"\n        ),\n    )\n    exec_run_parser.add_argument(\n        \"-m\",\n        \"--message\",\n        type=str,\n        default=None,\n        help=\"Custom commit message to use when committing the experiment.\",\n    )\n    exec_run_parser.set_defaults(func=CmdExecutorRun)\n"
  },
  {
    "path": "dvc/commands/experiments/ls.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsList(CmdBase):\n    def run(self):\n        name_only = self.args.name_only\n        sha_only = self.args.sha_only\n        git_remote = self.args.git_remote\n        if sha_only and git_remote:\n            raise InvalidArgumentError(\"--sha-only not supported with git_remote.\")\n        exps = self.repo.experiments.ls(\n            all_commits=self.args.all_commits,\n            rev=self.args.rev,\n            num=self.args.num,\n            git_remote=git_remote,\n        )\n\n        from dvc.repo.experiments.utils import describe\n        from dvc.scm import Git\n\n        if name_only or sha_only:\n            names = {}\n        else:\n            assert isinstance(self.repo.scm, Git)\n            names = describe(\n                self.repo.scm,\n                (baseline for baseline in exps),\n                logger=logger,\n            )\n\n        for baseline in exps:\n            if not (name_only or sha_only):\n                name = names.get(baseline) or baseline[:7]\n                ui.write(f\"{name}:\")\n            for exp_name, rev in exps[baseline]:\n                if name_only:\n                    ui.write(exp_name)\n                elif sha_only:\n                    ui.write(rev)\n                elif rev:\n                    ui.write(f\"\\t{rev[:7]} [{exp_name}]\")\n                else:\n                    ui.write(f\"\\t{exp_name}\")\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    from . import add_rev_selection_flags\n\n    EXPERIMENTS_LIST_HELP = \"List local and remote experiments.\"\n    experiments_list_parser = experiments_subparsers.add_parser(\n        \"list\",\n        aliases=[\"ls\"],\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_LIST_HELP, \"exp/list\"),\n        help=EXPERIMENTS_LIST_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    add_rev_selection_flags(experiments_list_parser, \"List\")\n    display_group = experiments_list_parser.add_mutually_exclusive_group()\n    display_group.add_argument(\n        \"--name-only\",\n        \"--names-only\",\n        action=\"store_true\",\n        help=\"Only output experiment names (without SHAs or parent commits).\",\n    )\n    display_group.add_argument(\n        \"--sha-only\",\n        \"--shas-only\",\n        action=\"store_true\",\n        help=\"Only output experiment commit SHAs (without names or parent commits).\",\n    )\n    experiments_list_parser.add_argument(\n        \"git_remote\",\n        nargs=\"?\",\n        default=None,\n        help=(\n            \"Optional Git remote name or Git URL. \"\n            \"If provided, experiments from the specified Git repository \"\n            \" will be listed instead of local ones.\"\n        ),\n        metavar=\"<git_remote>\",\n    )\n    experiments_list_parser.set_defaults(func=CmdExperimentsList)\n"
  },
  {
    "path": "dvc/commands/experiments/pull.py",
    "content": "import argparse\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsPull(CmdBase):\n    def run(self):\n        pulled_exps = self.repo.experiments.pull(\n            self.args.git_remote,\n            self.args.experiment,\n            all_commits=self.args.all_commits,\n            rev=self.args.rev,\n            num=self.args.num,\n            force=self.args.force,\n            pull_cache=self.args.pull_cache,\n            dvc_remote=self.args.dvc_remote,\n            jobs=self.args.jobs,\n            run_cache=self.args.run_cache,\n        )\n\n        if pulled_exps:\n            ui.write(\n                f\"Pulled experiment '{pulled_exps}'\",\n                f\"from Git remote '{self.args.git_remote}'.\",\n            )\n        else:\n            ui.write(\"No experiments to pull.\")\n        if not self.args.pull_cache:\n            ui.write(\n                \"To pull cached outputs for this experiment\"\n                \"from DVC remote storage,\"\n                \"re-run this command without '--no-cache'.\"\n            )\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    from . import add_rev_selection_flags\n\n    EXPERIMENTS_PULL_HELP = \"Pull an experiment from a Git remote.\"\n    experiments_pull_parser = experiments_subparsers.add_parser(\n        \"pull\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_PULL_HELP, \"exp/pull\"),\n        help=EXPERIMENTS_PULL_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    add_rev_selection_flags(experiments_pull_parser, \"Pull\", True)\n    experiments_pull_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        help=\"Replace local experiment if it already exists.\",\n    )\n    experiments_pull_parser.add_argument(\n        \"--no-cache\",\n        action=\"store_false\",\n        dest=\"pull_cache\",\n        help=\"Do not pull cached outputs for this experiment from DVC remote storage.\",\n    )\n    experiments_pull_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        dest=\"dvc_remote\",\n        metavar=\"<name>\",\n        help=\"Name of the DVC remote to use when pulling cached outputs.\",\n    ).complete = completion.REMOTE\n    experiments_pull_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        metavar=\"<number>\",\n        help=(\n            \"Number of jobs to run simultaneously when pulling from DVC remote storage.\"\n        ),\n    )\n    experiments_pull_parser.add_argument(\n        \"--run-cache\",\n        action=argparse.BooleanOptionalAction,\n        default=False,\n        help=\"Pull run history for all stages.\",\n    )\n    experiments_pull_parser.add_argument(\n        \"git_remote\",\n        help=\"Git remote name or Git URL.\",\n        metavar=\"<git_remote>\",\n    )\n    experiments_pull_parser.add_argument(\n        \"experiment\",\n        nargs=\"*\",\n        default=None,\n        help=\"Experiments to pull.\",\n        metavar=\"<experiment>\",\n    )\n    experiments_pull_parser.set_defaults(func=CmdExperimentsPull)\n"
  },
  {
    "path": "dvc/commands/experiments/push.py",
    "content": "import argparse\nfrom typing import Any\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsPush(CmdBase):\n    @staticmethod\n    def log_result(result: dict[str, Any], remote: str):\n        from dvc.utils import humanize\n\n        def join_exps(exps):\n            return humanize.join([f\"[bold]{e}[/]\" for e in exps])\n\n        if diverged_exps := result.get(\"diverged\"):\n            exps = join_exps(diverged_exps)\n            ui.error_write(\n                f\"[yellow]Local experiment {exps} has diverged \"\n                \"from remote experiment with the same name.\\n\"\n                \"To override the remote experiment re-run with '--force'.\",\n                styled=True,\n            )\n        if uptodate_exps := result.get(\"up_to_date\"):\n            exps = join_exps(uptodate_exps)\n            verb = \"are\" if len(uptodate_exps) > 1 else \"is\"\n            ui.write(\n                f\"Experiment {exps} {verb} up to date on Git remote {remote!r}.\",\n                styled=True,\n            )\n        if pushed_exps := result.get(\"success\"):\n            exps = join_exps(pushed_exps)\n            ui.write(f\"Pushed experiment {exps} to Git remote {remote!r}.\", styled=True)\n        if not uptodate_exps and not pushed_exps:\n            ui.write(\"No experiments to push.\")\n\n        if uploaded := result.get(\"uploaded\"):\n            stats = {\"uploaded\": uploaded}\n            ui.write(humanize.get_summary(stats.items()))\n\n        if project_url := result.get(\"url\"):\n            ui.rich_print(\n                \"View your experiments at\", project_url, style=\"yellow\", soft_wrap=True\n            )\n\n    def run(self):\n        from dvc.repo.experiments.push import UploadError\n\n        try:\n            result = self.repo.experiments.push(\n                self.args.git_remote,\n                self.args.experiment,\n                all_commits=self.args.all_commits,\n                rev=self.args.rev,\n                num=self.args.num,\n                force=self.args.force,\n                push_cache=self.args.push_cache,\n                dvc_remote=self.args.dvc_remote,\n                jobs=self.args.jobs,\n                run_cache=self.args.run_cache,\n            )\n        except UploadError as e:\n            self.log_result(e.result, self.args.git_remote)\n            raise\n\n        self.log_result(result, self.args.git_remote)\n        if not self.args.push_cache:\n            ui.write(\n                \"To push cached outputs\",\n                (\n                    \"for this experiment to DVC remote storage,\"\n                    \"re-run this command without '--no-cache'.\"\n                ),\n            )\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    from . import add_rev_selection_flags\n\n    EXPERIMENTS_PUSH_HELP = \"Push a local experiment to a Git remote.\"\n    experiments_push_parser = experiments_subparsers.add_parser(\n        \"push\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_PUSH_HELP, \"exp/push\"),\n        help=EXPERIMENTS_PUSH_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    add_rev_selection_flags(experiments_push_parser, \"Push\", True)\n    experiments_push_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        help=\"Replace experiment in the Git remote if it already exists.\",\n    )\n    experiments_push_parser.add_argument(\n        \"--no-cache\",\n        action=\"store_false\",\n        dest=\"push_cache\",\n        help=\"Do not push cached outputs for this experiment to DVC remote storage.\",\n    )\n    experiments_push_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        dest=\"dvc_remote\",\n        metavar=\"<name>\",\n        help=\"Name of the DVC remote to use when pushing cached outputs.\",\n    ).complete = completion.REMOTE\n    experiments_push_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        metavar=\"<number>\",\n        help=\"Number of jobs to run simultaneously when pushing to DVC remote storage.\",\n    )\n    experiments_push_parser.add_argument(\n        \"--run-cache\",\n        action=argparse.BooleanOptionalAction,\n        default=False,\n        help=\"Push run history for all stages.\",\n    )\n    experiments_push_parser.add_argument(\n        \"git_remote\",\n        help=\"Git remote name or Git URL.\",\n        metavar=\"<git_remote>\",\n    )\n    experiments_push_parser.add_argument(\n        \"experiment\",\n        nargs=\"*\",\n        default=None,\n        help=\"Experiments to push.\",\n        metavar=\"<experiment>\",\n    ).complete = completion.EXPERIMENT\n    experiments_push_parser.set_defaults(func=CmdExperimentsPush)\n"
  },
  {
    "path": "dvc/commands/experiments/queue_worker.py",
    "content": "from dvc.cli.command import CmdBase\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueWorker(CmdBase):\n    \"\"\"Run the exp queue worker.\"\"\"\n\n    def run(self):\n        self.repo.experiments.celery_queue.worker.start(\n            self.args.name, fsapp_clean=self.args.clean\n        )\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    QUEUE_WORKER_HELP = \"Run the exp queue worker.\"\n    parser = experiments_subparsers.add_parser(\n        \"queue-worker\",\n        parents=[parent_parser],\n        description=QUEUE_WORKER_HELP,\n        add_help=False,\n    )\n    parser.add_argument(\"name\", help=\"Celery worker name.\")\n    parser.add_argument(\n        \"--clean\",\n        action=\"store_true\",\n        help=\"Automatically cleanup celery broker on shutdown.\",\n    )\n    parser.set_defaults(func=CmdQueueWorker)\n"
  },
  {
    "path": "dvc/commands/experiments/remove.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsRemove(CmdBase):\n    def check_arguments(self):\n        if not any(\n            [\n                self.args.all_commits,\n                self.args.rev,\n                self.args.queue,\n            ]\n        ) ^ bool(self.args.experiment):\n            raise InvalidArgumentError(\n                \"Either provide an `experiment` argument, or use the \"\n                \"`--rev` or `--all-commits` or `--queue` flag.\"\n            )\n\n    def run(self):\n        from dvc.utils import humanize\n\n        self.check_arguments()\n\n        removed = self.repo.experiments.remove(\n            exp_names=self.args.experiment,\n            all_commits=self.args.all_commits,\n            rev=self.args.rev,\n            num=self.args.num,\n            queue=self.args.queue,\n            git_remote=self.args.git_remote,\n            keep=self.args.keep,\n        )\n        if removed:\n            ui.write(f\"Removed experiments: {humanize.join(map(repr, removed))}\")\n        else:\n            ui.write(\"No experiments to remove.\")\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    from . import add_keep_selection_flag, add_rev_selection_flags\n\n    EXPERIMENTS_REMOVE_HELP = \"Remove experiments.\"\n    experiments_remove_parser = experiments_subparsers.add_parser(\n        \"remove\",\n        aliases=[\"rm\"],\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_REMOVE_HELP, \"exp/remove\"),\n        help=EXPERIMENTS_REMOVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remove_group = experiments_remove_parser.add_mutually_exclusive_group()\n    add_rev_selection_flags(experiments_remove_parser, \"Remove\", False)\n    add_keep_selection_flag(experiments_remove_parser)\n    remove_group.add_argument(\n        \"--queue\", action=\"store_true\", help=\"Remove all queued experiments.\"\n    )\n    remove_group.add_argument(\n        \"-g\",\n        \"--git-remote\",\n        metavar=\"<git_remote>\",\n        help=\"Name or URL of the Git remote to remove the experiment from\",\n    )\n    experiments_remove_parser.add_argument(\n        \"experiment\",\n        nargs=\"*\",\n        help=\"Experiments to remove.\",\n        metavar=\"<experiment>\",\n    )\n    experiments_remove_parser.set_defaults(func=CmdExperimentsRemove)\n"
  },
  {
    "path": "dvc/commands/experiments/rename.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsRename(CmdBase):\n    def run(self):\n        from dvc.utils import humanize\n\n        if not (self.args.experiment and self.args.name):\n            raise InvalidArgumentError(\n                \"An experiment to rename and a new experiment name are required.\"\n            )\n        renamed = self.repo.experiments.rename(\n            exp_name=self.args.experiment,\n            new_name=self.args.name,\n            git_remote=self.args.git_remote,\n            force=self.args.force,\n        )\n        if renamed:\n            ui.write(f\"Renamed experiments: {humanize.join(map(repr, renamed))}\")\n        else:\n            ui.write(\"No experiments to rename.\")\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_RENAME_HELP = \"Rename experiments.\"\n    experiments_rename_parser = experiments_subparsers.add_parser(\n        \"rename\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_RENAME_HELP, \"exp/rename\"),\n        help=EXPERIMENTS_RENAME_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    rename_group = experiments_rename_parser.add_mutually_exclusive_group()\n    rename_group.add_argument(\n        \"-g\",\n        \"--git-remote\",\n        metavar=\"<git_remote>\",\n        help=\"Name or URL of the Git remote to rename the experiment from\",\n    )\n    experiments_rename_parser.add_argument(\n        \"experiment\",\n        help=\"Experiment to rename.\",\n        nargs=\"?\",\n        metavar=\"<experiment>\",\n    )\n    experiments_rename_parser.add_argument(\n        \"name\",\n        help=\"New name for the experiment.\",\n        metavar=\"<name>\",\n    )\n    experiments_rename_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Replace experiment if it already exists.\",\n    )\n    experiments_rename_parser.set_defaults(func=CmdExperimentsRename)\n"
  },
  {
    "path": "dvc/commands/experiments/run.py",
    "content": "import argparse\n\nfrom dvc.cli import formatter\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.repro import CmdRepro\nfrom dvc.commands.repro import add_arguments as add_repro_arguments\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsRun(CmdRepro):\n    def run(self):\n        self.repo.experiments.run(\n            name=self.args.name,\n            queue=self.args.queue,\n            run_all=self.args.run_all,\n            jobs=self.args.jobs,\n            params=self.args.set_param,\n            tmp_dir=self.args.tmp_dir,\n            copy_paths=self.args.copy_paths,\n            message=self.args.message,\n            no_hydra=self.args.no_hydra,\n            **self._common_kwargs,\n        )\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_RUN_HELP = \"Run an experiment.\"\n    experiments_run_parser = experiments_subparsers.add_parser(\n        \"run\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_RUN_HELP, \"exp/run\"),\n        help=EXPERIMENTS_RUN_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    _add_run_common(experiments_run_parser)\n    experiments_run_parser.set_defaults(func=CmdExperimentsRun)\n\n\ndef _add_run_common(parser):\n    \"\"\"Add common args for 'exp run'.\"\"\"\n    # inherit arguments from `dvc repro`\n    add_repro_arguments(parser)\n    parser.add_argument(\n        \"-n\",\n        \"--name\",\n        default=None,\n        help=(\n            \"Human-readable experiment name. If not specified, a name will \"\n            \"be auto-generated.\"\n        ),\n        metavar=\"<name>\",\n    )\n    parser.add_argument(\n        \"-S\",\n        \"--set-param\",\n        action=\"append\",\n        default=[],\n        help=\"Use the specified param value when reproducing pipelines.\",\n        metavar=\"[<filename>:]<param_name>=<param_value>\",\n    )\n    parser.add_argument(\n        \"--queue\",\n        action=\"store_true\",\n        default=False,\n        help=\"Stage this experiment in the run queue for future execution.\",\n    )\n    parser.add_argument(\n        \"--run-all\",\n        action=\"store_true\",\n        default=False,\n        help=\"Execute all experiments in the run queue. Implies --temp.\",\n    )\n    parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        default=1,\n        help=\"Run the specified number of experiments at a time in parallel.\",\n        metavar=\"<number>\",\n    )\n    parser.add_argument(\n        \"--temp\",\n        action=\"store_true\",\n        dest=\"tmp_dir\",\n        help=(\n            \"Run this experiment in a separate temporary directory instead of \"\n            \"your workspace.\"\n        ),\n    )\n    parser.add_argument(\n        \"-C\",\n        \"--copy-paths\",\n        action=\"append\",\n        default=[],\n        help=(\n            \"List of ignored or untracked paths to copy into the temp directory.\"\n            \" Only used if `--temp` or `--queue` is specified.\"\n        ),\n    )\n    parser.add_argument(\n        \"-m\",\n        \"--message\",\n        type=str,\n        default=None,\n        help=\"Custom commit message to use when committing the experiment.\",\n    )\n    parser.add_argument(\n        \"--no-hydra\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Disables automatically updating `params.yaml` with Hydra configuration. \"\n            \" You can still use `--set-param` to update individual params if needed.\"\n            \" Default is False.\"\n        ),\n    )\n    parser.add_argument(\"-M\", dest=\"message\", help=argparse.SUPPRESS)  # obsolete\n"
  },
  {
    "path": "dvc/commands/experiments/save.py",
    "content": "import argparse\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdExperimentsSave(CmdBase):\n    def run(self):\n        try:\n            ref = self.repo.experiments.save(\n                targets=self.args.targets,\n                name=self.args.name,\n                recursive=self.args.recursive,\n                force=self.args.force,\n                include_untracked=self.args.include_untracked,\n                message=self.args.message,\n            )\n        except DvcException:\n            logger.exception(\"failed to save experiment\")\n            return 1\n\n        if self.args.json:\n            ui.write_json({\"ref\": ref})\n        else:\n            name = self.repo.experiments.get_exact_name([ref])[ref]\n            ui.write(f\"Experiment has been saved as: {name}\")\n\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    EXPERIMENTS_SAVE_HELP = \"Save current workspace as an experiment.\"\n    save_parser = experiments_subparsers.add_parser(\n        \"save\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_SAVE_HELP, \"exp/save\"),\n        help=EXPERIMENTS_SAVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    save_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\"Limit DVC caching to these .dvc files and stage names.\"),\n    ).complete = completion.DVCFILES_AND_STAGE\n    save_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Cache subdirectories of the specified directory.\",\n    )\n    save_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Replace experiment if it already exists.\",\n    )\n    save_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    save_parser.add_argument(\n        \"-n\",\n        \"--name\",\n        default=None,\n        help=(\n            \"Human-readable experiment name. If not specified, a name will \"\n            \"be auto-generated.\"\n        ),\n        metavar=\"<name>\",\n    )\n    save_parser.add_argument(\n        \"-I\",\n        \"--include-untracked\",\n        action=\"append\",\n        default=[],\n        help=\"List of untracked paths to include in the experiment.\",\n        metavar=\"<path>\",\n    )\n    save_parser.add_argument(\n        \"-m\",\n        \"--message\",\n        type=str,\n        default=None,\n        help=\"Custom commit message to use when committing the experiment.\",\n    )\n    save_parser.add_argument(\"-M\", dest=\"message\", help=argparse.SUPPRESS)  # obsolete\n    save_parser.set_defaults(func=CmdExperimentsSave)\n"
  },
  {
    "path": "dvc/commands/experiments/show.py",
    "content": "import argparse\nimport re\nfrom collections.abc import Collection, Iterable\nfrom datetime import date, datetime\nfrom typing import TYPE_CHECKING\n\nfrom funcy import lmap\n\nfrom dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.metrics import DEFAULT_PRECISION\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils.serialize import encode_exception\n\nif TYPE_CHECKING:\n    from dvc.compare import TabularData\n    from dvc.ui import RichText\n\nFILL_VALUE = \"-\"\nFILL_VALUE_ERRORED = \"!\"\n\n\nlogger = logger.getChild(__name__)\n\n\nexperiment_types = {\n    \"branch_commit\": \"├──\",\n    \"branch_base\": \"└──\",\n    \"baseline\": \"\",\n}\n\n\ndef prepare_exp_id(kwargs) -> \"RichText\":\n    exp_name = kwargs[\"Experiment\"]\n    rev = kwargs[\"rev\"]\n    typ = kwargs.get(\"typ\", \"baseline\")\n\n    if typ == \"baseline\" or not exp_name:\n        text = ui.rich_text(exp_name or rev)\n    else:\n        text = ui.rich_text.assemble(rev, \" [\", (exp_name, \"bold\"), \"]\")\n\n    parent = kwargs.get(\"parent\")\n    suff = f\" ({parent})\" if parent else \"\"\n    text.append(suff)\n\n    tree = experiment_types[typ]\n    pref = f\"{tree} \" if tree else \"\"\n    return ui.rich_text(pref) + text\n\n\ndef baseline_styler(typ):\n    return {\"style\": \"bold\"} if typ == \"baseline\" else {}\n\n\ndef show_experiments(\n    td: \"TabularData\",\n    headers: dict[str, Iterable[str]],\n    keep: Collection[str] = (),\n    drop: Collection[str] = (),\n    pager=True,\n    csv=False,\n    markdown=False,\n    **kwargs,\n):\n    if keep:\n        keep_re = re.compile(\"|\".join(keep))\n        td.protect(*(col for col in td.keys() if keep_re.match(col)))  # noqa: SIM118\n\n    for col in (\"State\", \"Executor\"):\n        if td.is_empty(col):\n            td.drop(col)\n\n    row_styles = lmap(baseline_styler, td.column(\"typ\"))\n\n    if not csv:\n        merge_headers = [\"Experiment\", \"rev\", \"typ\", \"parent\"]\n        td.column(\"Experiment\")[:] = map(prepare_exp_id, td.as_dict(merge_headers))\n        td.drop(*merge_headers[1:])\n\n    styles = {\n        \"Experiment\": {\"no_wrap\": True, \"header_style\": \"black on grey93\"},\n        \"Created\": {\"header_style\": \"black on grey93\"},\n        \"State\": {\"header_style\": \"black on grey93\"},\n        \"Executor\": {\"header_style\": \"black on grey93\"},\n    }\n    header_bg_colors = {\n        \"metrics\": \"cornsilk1\",\n        \"params\": \"light_cyan1\",\n        \"deps\": \"plum2\",\n    }\n    styles.update(\n        {\n            header: {\n                \"justify\": \"right\" if typ == \"metrics\" else \"left\",\n                \"header_style\": f\"black on {header_bg_colors[typ]}\",\n                \"collapse\": idx != 0,\n                \"no_wrap\": typ == \"metrics\",\n            }\n            for typ, hs in headers.items()\n            for idx, header in enumerate(hs)\n        }\n    )\n\n    if kwargs.get(\"only_changed\", False):\n        td.drop_duplicates(\"cols\", ignore_empty=False)\n\n    cols_to_drop = set()\n    if drop:\n        drop_re = re.compile(\"|\".join(drop))\n        cols_to_drop = {col for col in td.keys() if drop_re.match(col)}  # noqa: SIM118\n    td.drop(*cols_to_drop)\n\n    td.render(\n        pager=pager,\n        borders=\"horizontals\",\n        rich_table=True,\n        header_styles=styles,\n        row_styles=row_styles,\n        csv=csv,\n        markdown=markdown,\n    )\n\n\ndef _normalize_headers(names, count):\n    return [\n        name if count[name] == 1 else f\"{path}:{name}\"\n        for path in names\n        for name in names[path]\n    ]\n\n\ndef _format_json(item):\n    if isinstance(item, (date, datetime)):\n        return item.isoformat()\n    return encode_exception(item)\n\n\nclass CmdExperimentsShow(CmdBase):\n    def run(self):\n        from dvc.repo.experiments.show import tabulate\n\n        try:\n            exps = self.repo.experiments.show(\n                all_branches=self.args.all_branches,\n                all_tags=self.args.all_tags,\n                all_commits=self.args.all_commits,\n                hide_queued=self.args.hide_queued,\n                hide_failed=self.args.hide_failed,\n                hide_workspace=self.args.hide_workspace,\n                revs=self.args.rev,\n                num=self.args.num,\n                sha_only=self.args.sha,\n                param_deps=self.args.param_deps,\n                fetch_running=self.args.fetch_running,\n                force=self.args.force,\n            )\n        except DvcException:\n            logger.exception(\"failed to show experiments\")\n            return 1\n\n        if self.args.json:\n            ui.write_json([exp.dumpd() for exp in exps], default=_format_json)\n        else:\n            precision = (\n                self.args.precision or None if self.args.csv else DEFAULT_PRECISION\n            )\n            fill_value = \"\" if self.args.csv else FILL_VALUE\n            iso = self.args.csv\n            td, headers = tabulate(\n                exps,\n                precision=precision,\n                fill_value=fill_value,\n                iso=iso,\n                sort_by=self.args.sort_by,\n                sort_order=self.args.sort_order,\n            )\n\n            show_experiments(\n                td,\n                headers,\n                keep=self.args.keep,\n                drop=self.args.drop,\n                sort_by=self.args.sort_by,\n                sort_order=self.args.sort_order,\n                pager=not self.args.no_pager,\n                csv=self.args.csv,\n                markdown=self.args.markdown,\n                only_changed=self.args.only_changed,\n            )\n        return 0\n\n\ndef add_parser(experiments_subparsers, parent_parser):\n    from . import add_rev_selection_flags\n\n    EXPERIMENTS_SHOW_HELP = \"Print experiments.\"\n    experiments_show_parser = experiments_subparsers.add_parser(\n        \"show\",\n        parents=[parent_parser],\n        description=append_doc_link(EXPERIMENTS_SHOW_HELP, \"exp/show\"),\n        help=EXPERIMENTS_SHOW_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    add_rev_selection_flags(experiments_show_parser, \"Show\")\n    experiments_show_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show experiments derived from the tip of all Git branches.\",\n    )\n    experiments_show_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show experiments derived from all Git tags.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--no-pager\",\n        action=\"store_true\",\n        default=False,\n        help=\"Do not pipe output into a pager.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--only-changed\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Only show metrics/params with values varying \"\n            \"across the selected experiments.\"\n        ),\n    )\n    experiments_show_parser.add_argument(\n        \"--drop\",\n        action=\"append\",\n        default=[],\n        help=\"Remove the columns matching the specified regex pattern.\",\n        metavar=\"<regex_pattern>\",\n    )\n    experiments_show_parser.add_argument(\n        \"--keep\",\n        action=\"append\",\n        default=[],\n        help=\"Preserve the columns matching the specified regex pattern.\",\n        metavar=\"<regex_pattern>\",\n    )\n    experiments_show_parser.add_argument(\n        \"--param-deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show only params that are stage dependencies.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--sort-by\",\n        help=\"Sort related experiments by the specified metric or param.\",\n        metavar=\"<metric/param>\",\n    )\n    experiments_show_parser.add_argument(\n        \"--sort-order\",\n        help=\"Sort order to use with --sort-by. Defaults to ascending ('asc').\",\n        choices=(\"asc\", \"desc\"),\n        default=\"asc\",\n    )\n    experiments_show_parser.add_argument(\n        \"--sha\",\n        action=\"store_true\",\n        default=False,\n        help=\"Always show git commit SHAs instead of branch/tag names.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--hide-failed\",\n        action=\"store_true\",\n        default=False,\n        help=\"Hide failed experiments in the table.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--hide-queued\",\n        action=\"store_true\",\n        default=False,\n        help=\"Hide queued experiments in the table.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--hide-workspace\",\n        action=\"store_true\",\n        default=False,\n        help=\"Hide workspace row in the table.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print output in JSON format instead of a human-readable table.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--csv\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print output in csv format instead of a human-readable table.\",\n    )\n    experiments_show_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n    )\n    experiments_show_parser.add_argument(\n        \"--precision\",\n        type=int,\n        help=(\n            \"Round metrics/params to `n` digits precision after the decimal \"\n            f\"point. Rounds to {DEFAULT_PRECISION} digits by default.\"\n        ),\n        metavar=\"<n>\",\n    )\n    experiments_show_parser.add_argument(\n        \"--no-fetch\",\n        dest=\"fetch_running\",\n        action=\"store_false\",\n        help=argparse.SUPPRESS,\n    )\n    experiments_show_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        help=\"Force re-collection of experiments instead of loading from exp cache.\",\n    )\n    experiments_show_parser.set_defaults(func=CmdExperimentsShow)\n"
  },
  {
    "path": "dvc/commands/freeze.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdFreezeBase(CmdBase):\n    def _run(self, func, name):\n        ret = 0\n        for target in self.args.targets:\n            try:\n                func(target)\n            except DvcException:\n                logger.exception(\"failed to %s '%s'\", name, target)\n                ret = 1\n        return ret\n\n\nclass CmdFreeze(CmdFreezeBase):\n    def run(self):\n        return self._run(self.repo.freeze, \"freeze\")\n\n\nclass CmdUnfreeze(CmdFreezeBase):\n    def run(self):\n        return self._run(self.repo.unfreeze, \"unfreeze\")\n\n\ndef add_parser(subparsers, parent_parser):\n    FREEZE_HELP = \"Freeze stages or .dvc files.\"\n    freeze_parser = subparsers.add_parser(\n        \"freeze\",\n        parents=[parent_parser],\n        description=append_doc_link(FREEZE_HELP, \"freeze\"),\n        help=FREEZE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    freeze_parser.add_argument(\n        \"targets\", nargs=\"+\", help=\"Stages or .dvc files to freeze\"\n    ).complete = completion.DVC_FILE\n    freeze_parser.set_defaults(func=CmdFreeze)\n\n    UNFREEZE_HELP = \"Unfreeze stages or .dvc files.\"\n    unfreeze_parser = subparsers.add_parser(\n        \"unfreeze\",\n        parents=[parent_parser],\n        description=append_doc_link(UNFREEZE_HELP, \"unfreeze\"),\n        help=UNFREEZE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    unfreeze_parser.add_argument(\n        \"targets\", nargs=\"+\", help=\"Stages or .dvc files to unfreeze\"\n    ).complete = completion.DVC_FILE\n    unfreeze_parser.set_defaults(func=CmdUnfreeze)\n"
  },
  {
    "path": "dvc/commands/gc.py",
    "content": "import os\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdGC(CmdBase):\n    def run(self):  # noqa: C901, PLR0912\n        from dvc.repo.gc import _validate_args\n\n        _validate_args(\n            all_branches=self.args.all_branches,\n            all_tags=self.args.all_tags,\n            all_commits=self.args.all_commits,\n            all_experiments=self.args.all_experiments,\n            commit_date=self.args.commit_date,\n            workspace=self.args.workspace,\n            rev=self.args.rev,\n            num=self.args.num,\n            cloud=self.args.cloud,\n            not_in_remote=self.args.not_in_remote,\n        )\n\n        # Don't prompt during dry run\n        if self.args.dry:\n            self.args.force = True\n\n        if self.args.rev:\n            self.args.num = self.args.num or 1\n\n        msg = \"This will remove all cache except items used in \"\n\n        msg += \"the workspace\"\n        if self.args.all_commits:\n            msg += \" and all git commits\"\n        else:\n            if self.args.all_branches and self.args.all_tags:\n                if self.args.num:\n                    msg += f\" and last {self.args.num} commits from all git branches\"\n                    msg += \" and all git tags\"\n                else:\n                    msg += \" and all git branches and tags\"\n            elif self.args.all_branches:\n                if self.args.num:\n                    msg += f\" and last {self.args.num} commits from all git branches\"\n                else:\n                    msg += \" and all git branches\"\n            elif self.args.all_tags:\n                msg += \" and all git tags\"\n            if self.args.commit_date:\n                msg += f\" and all git commits before date {self.args.commit_date}\"\n            if self.args.rev:\n                msg += f\" and last {self.args.num} commits from {self.args.rev}\"\n\n        if self.args.all_experiments:\n            msg += \" and all experiments\"\n\n        if self.args.not_in_remote:\n            msg += \" that are not present in the DVC remote\"\n\n        if self.args.repos:\n            msg += \" of the current and the following repos:\"\n\n            for repo_path in self.args.repos:\n                msg += f\"\\n  - {os.path.abspath(repo_path)}\"\n        else:\n            msg += \" of the current repo.\"\n\n        logger.warning(msg)\n\n        msg = \"Are you sure you want to proceed?\"\n        if not self.args.force and not ui.confirm(msg):\n            return 1\n\n        self.repo.gc(\n            all_branches=self.args.all_branches,\n            all_tags=self.args.all_tags,\n            all_commits=self.args.all_commits,\n            all_experiments=self.args.all_experiments,\n            commit_date=self.args.commit_date,\n            cloud=self.args.cloud,\n            remote=self.args.remote,\n            force=self.args.force,\n            jobs=self.args.jobs,\n            repos=self.args.repos,\n            workspace=self.args.workspace,\n            rev=self.args.rev,\n            num=self.args.num,\n            not_in_remote=self.args.not_in_remote,\n            dry=self.args.dry,\n            skip_failed=self.args.skip_failed,\n        )\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    GC_HELP = \"Garbage collect unused objects from cache or remote storage.\"\n    GC_DESCRIPTION = (\n        \"Removes all files in the cache or a remote which are not in\\n\"\n        \"use by the specified Git revisions (defaults to just HEAD).\"\n    )\n    gc_parser = subparsers.add_parser(\n        \"gc\",\n        parents=[parent_parser],\n        description=append_doc_link(GC_DESCRIPTION, \"gc\"),\n        help=GC_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    gc_parser.add_argument(\n        \"-w\",\n        \"--workspace\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files used in the current workspace.\",\n    )\n    gc_parser.add_argument(\n        \"--rev\",\n        type=str,\n        default=None,\n        help=\"Keep data files used in the specified <commit>.\",\n        metavar=\"<commit>\",\n    )\n    gc_parser.add_argument(\n        \"-n\",\n        \"--num\",\n        type=int,\n        dest=\"num\",\n        metavar=\"<num>\",\n        help=(\n            \"Keep data files used in the last `num` commits \"\n            \"starting from each selected revision root. \"\n            \"Can be used with `--rev` and `--all-branches`. \"\n            \"Defaults to `1`.\"\n        ),\n    )\n    gc_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files for the tips of all Git branches.\",\n    )\n    gc_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files for all Git tags.\",\n    )\n    gc_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files for all Git commits.\",\n    )\n    gc_parser.add_argument(\n        \"--date\",\n        type=str,\n        dest=\"commit_date\",\n        metavar=\"<yyyy-mm-dd>\",\n        default=None,\n        help=(\n            \"Keep cached data referenced in the commits after ( inclusive )\"\n            \" a certain time. Date must match the extended ISO 8601 format \"\n            \"(yyyy-mm-dd).\"\n        ),\n    )\n    gc_parser.add_argument(\n        \"--all-experiments\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files for all experiments.\",\n    )\n    gc_parser.add_argument(\n        \"--not-in-remote\",\n        action=\"store_true\",\n        default=False,\n        help=\"Keep data files that are not present in the remote.\",\n    )\n    gc_parser.add_argument(\n        \"-c\",\n        \"--cloud\",\n        action=\"store_true\",\n        default=False,\n        help=\"Collect garbage in remote storage in addition to local cache.\",\n    )\n    gc_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to collect garbage in\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    gc_parser.add_argument(\n        \"--skip-failed\",\n        action=\"store_true\",\n        default=False,\n        help=\"Skip revisions that fail when collected.\",\n    )\n    gc_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Force garbage collection - automatically agree to all prompts.\",\n    )\n    gc_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    gc_parser.add_argument(\n        \"-p\",\n        \"--projects\",\n        dest=\"repos\",\n        type=str,\n        nargs=\"*\",\n        help=(\n            \"Keep data files required by these projects \"\n            \"in addition to the current one. \"\n            \"Useful if you share a single cache across repos.\"\n        ),\n        metavar=\"<paths>\",\n    )\n    gc_parser.add_argument(\n        \"--dry\",\n        action=\"store_true\",\n        default=False,\n        help=(\"Only print what would get removed without actually removing.\"),\n    )\n    gc_parser.set_defaults(func=CmdGC)\n"
  },
  {
    "path": "dvc/commands/get.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdGet(CmdBaseNoRepo):\n    def _show_url(self):\n        from dvc.api import get_url\n        from dvc.ui import ui\n\n        url = get_url(\n            self.args.path,\n            repo=self.args.url,\n            rev=self.args.rev,\n            remote=self.args.remote,\n            remote_config=self.args.remote_config,\n        )\n        ui.write(url, force=True)\n\n        return 0\n\n    def run(self):\n        if self.args.show_url:\n            return self._show_url()\n\n        return self._get_file_from_repo()\n\n    def _get_file_from_repo(self):\n        from dvc.repo import Repo\n        from dvc.scm import CloneError\n\n        try:\n            Repo.get(\n                self.args.url,\n                path=self.args.path,\n                out=self.args.out,\n                rev=self.args.rev,\n                jobs=self.args.jobs,\n                force=self.args.force,\n                config=self.args.config,\n                remote=self.args.remote,\n                remote_config=self.args.remote_config,\n            )\n            return 0\n        except CloneError:\n            logger.exception(\"failed to get '%s'\", self.args.path)\n            return 1\n        except DvcException:\n            logger.exception(\n                \"failed to get '%s' from '%s'\", self.args.path, self.args.url\n            )\n            return 1\n\n\ndef add_parser(subparsers, parent_parser):\n    GET_HELP = \"Download file or directory tracked by DVC or by Git.\"\n    get_parser = subparsers.add_parser(\n        \"get\",\n        parents=[parent_parser],\n        description=append_doc_link(GET_HELP, \"get\"),\n        help=GET_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    get_parser.add_argument(\n        \"url\", help=\"Location of DVC or Git repository to download from\"\n    )\n    get_parser.add_argument(\n        \"path\", help=\"Path to a file or directory within the repository\"\n    ).complete = completion.FILE\n    get_parser.add_argument(\n        \"-o\",\n        \"--out\",\n        nargs=\"?\",\n        help=\"Destination path to download files to\",\n        metavar=\"<path>\",\n    ).complete = completion.DIR\n    get_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<commit>\",\n    )\n    get_parser.add_argument(\n        \"--show-url\",\n        action=\"store_true\",\n        help=(\n            \"Print the storage location (URL) the target data would be \"\n            \"downloaded from, and exit.\"\n        ),\n    )\n    get_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    get_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override local file or folder if exists.\",\n    )\n    get_parser.add_argument(\n        \"--config\",\n        type=str,\n        help=(\n            \"Path to a config file that will be merged with the config \"\n            \"in the target repository.\"\n        ),\n    )\n    get_parser.add_argument(\n        \"--remote\",\n        type=str,\n        help=\"Remote name to set as a default in the target repository.\",\n    ).complete = completion.REMOTE\n    get_parser.add_argument(\n        \"--remote-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=(\n            \"Remote config options to merge with a remote's config (default or one \"\n            \"specified by '--remote') in the target repository.\"\n        ),\n    )\n    get_parser.set_defaults(func=CmdGet)\n"
  },
  {
    "path": "dvc/commands/get_url.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdGetUrl(CmdBaseNoRepo):\n    def run(self):\n        from dvc.config import Config\n        from dvc.repo import Repo\n\n        try:\n            Repo.get_url(\n                self.args.url,\n                out=self.args.out,\n                jobs=self.args.jobs,\n                force=self.args.force,\n                fs_config=self.args.fs_config,\n                config=Config.from_cwd(),\n            )\n            return 0\n        except DvcException:\n            logger.exception(\"failed to get '%s'\", self.args.url)\n            return 1\n\n\ndef add_parser(subparsers, parent_parser):\n    GET_HELP = \"Download or copy files from URL.\"\n    get_parser = subparsers.add_parser(\n        \"get-url\",\n        parents=[parent_parser],\n        description=append_doc_link(GET_HELP, \"get-url\"),\n        help=GET_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    get_parser.add_argument(\n        \"url\", help=\"See `dvc import-url -h` for full list of supported URLs.\"\n    )\n    get_parser.add_argument(\n        \"out\", nargs=\"?\", help=\"Destination path to put data to.\"\n    ).complete = completion.DIR\n    get_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    get_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override local file or folder if exists.\",\n    )\n    get_parser.add_argument(\n        \"--fs-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=\"Config options for the target url.\",\n    )\n    get_parser.set_defaults(func=CmdGetUrl)\n"
  },
  {
    "path": "dvc/commands/git_hook.py",
    "content": "import os\n\nfrom dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.exceptions import NotDvcRepoError\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdHookBase(CmdBaseNoRepo):\n    def run(self):\n        from dvc.repo import Repo\n\n        try:\n            repo = Repo()\n            repo.close()\n        except NotDvcRepoError:\n            return 0\n\n        return self._run()\n\n    def _run(self):\n        raise NotImplementedError\n\n\nclass CmdPreCommit(CmdHookBase):\n    def _run(self):\n        from dvc.cli import main\n\n        return main([\"status\"])\n\n\nclass CmdPostCheckout(CmdHookBase):\n    def _run(self):\n        # when we are running from pre-commit tool, it doesn't provide CLI\n        # flags, but instead provides respective env vars that we could use.\n        flag = os.environ.get(\"PRE_COMMIT_CHECKOUT_TYPE\")\n        if flag is None and len(self.args.args) >= 3:\n            # see https://git-scm.com/docs/githooks#_post_checkout\n            flag = self.args.args[2]\n\n        # checking out some reference and not specific file.\n        if flag != \"1\":\n            return 0\n\n        # make sure we are not in the middle of a rebase/merge, so we\n        # don't accidentally break it with an unsuccessful checkout.\n        # Note that git hooks are always running in repo root.\n        if os.path.isdir(os.path.join(\".git\", \"rebase-merge\")):\n            return 0\n\n        from dvc.cli import main\n\n        return main([\"checkout\"])\n\n\nclass CmdPrePush(CmdHookBase):\n    def _run(self):\n        from dvc.cli import main\n\n        return main([\"push\"])\n\n\nclass CmdMergeDriver(CmdHookBase):\n    def _run(self):\n        from dvc.dvcfile import load_file\n        from dvc.repo import Repo\n\n        dvc = Repo()\n\n        try:\n            ancestor = load_file(dvc, self.args.ancestor, verify=False)\n            our = load_file(dvc, self.args.our, verify=False)\n            their = load_file(dvc, self.args.their, verify=False)\n\n            our.merge(ancestor, their, allowed=[\"add\", \"remove\", \"change\"])\n\n            return 0\n        finally:\n            dvc.close()\n\n\ndef add_parser(subparsers, parent_parser):\n    GIT_HOOK_HELP = \"Run GIT hook.\"\n\n    git_hook_parser = subparsers.add_parser(\n        \"git-hook\",\n        parents=[parent_parser],\n        description=GIT_HOOK_HELP,\n        add_help=False,\n    )\n\n    git_hook_subparsers = git_hook_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc daemon CMD --help` for command-specific help.\",\n        required=True,\n    )\n\n    PRE_COMMIT_HELP = \"Run pre-commit GIT hook.\"\n    pre_commit_parser = git_hook_subparsers.add_parser(\n        \"pre-commit\",\n        parents=[parent_parser],\n        description=PRE_COMMIT_HELP,\n        help=PRE_COMMIT_HELP,\n    )\n    pre_commit_parser.add_argument(\n        \"args\", nargs=\"*\", help=\"Arguments passed by GIT or pre-commit tool.\"\n    )\n    pre_commit_parser.set_defaults(func=CmdPreCommit)\n\n    POST_CHECKOUT_HELP = \"Run post-checkout GIT hook.\"\n    post_checkout_parser = git_hook_subparsers.add_parser(\n        \"post-checkout\",\n        parents=[parent_parser],\n        description=POST_CHECKOUT_HELP,\n        help=POST_CHECKOUT_HELP,\n    )\n    post_checkout_parser.add_argument(\n        \"args\", nargs=\"*\", help=\"Arguments passed by GIT or pre-commit tool.\"\n    )\n    post_checkout_parser.set_defaults(func=CmdPostCheckout)\n\n    PRE_PUSH_HELP = \"Run pre-push GIT hook.\"\n    pre_push_parser = git_hook_subparsers.add_parser(\n        \"pre-push\",\n        parents=[parent_parser],\n        description=PRE_PUSH_HELP,\n        help=PRE_PUSH_HELP,\n    )\n    pre_push_parser.add_argument(\n        \"args\", nargs=\"*\", help=\"Arguments passed by GIT or pre-commit tool.\"\n    )\n    pre_push_parser.set_defaults(func=CmdPrePush)\n\n    MERGE_DRIVER_HELP = \"Run GIT merge driver.\"\n    merge_driver_parser = git_hook_subparsers.add_parser(\n        \"merge-driver\",\n        parents=[parent_parser],\n        description=MERGE_DRIVER_HELP,\n        help=MERGE_DRIVER_HELP,\n        formatter_class=formatter.HelpFormatter,\n    )\n    merge_driver_parser.add_argument(\n        \"--ancestor\",\n        required=True,\n        help=\"Ancestor's version of the conflicting file.\",\n    )\n    merge_driver_parser.add_argument(\n        \"--our\", required=True, help=\"Current version of the conflicting file.\"\n    )\n    merge_driver_parser.add_argument(\n        \"--their\",\n        required=True,\n        help=\"Other branch's version of the conflicting file.\",\n    )\n    merge_driver_parser.set_defaults(func=CmdMergeDriver)\n"
  },
  {
    "path": "dvc/commands/imp.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdImport(CmdBase):\n    def run(self):\n        from dvc.scm import CloneError\n\n        try:\n            self.repo.imp(\n                self.args.url,\n                self.args.path,\n                out=self.args.out,\n                rev=self.args.rev,\n                no_exec=self.args.no_exec,\n                no_download=self.args.no_download,\n                jobs=self.args.jobs,\n                config=self.args.config,\n                remote=self.args.remote,\n                remote_config=self.args.remote_config,\n                force=self.args.force,\n            )\n        except CloneError:\n            logger.exception(\"failed to import '%s'\", self.args.path)\n            return 1\n        except DvcException:\n            logger.exception(\n                \"failed to import '%s' from '%s'.\",\n                self.args.path,\n                self.args.url,\n            )\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    IMPORT_HELP = (\n        \"Download file or directory tracked by DVC or by Git \"\n        \"into the workspace, and track it.\"\n    )\n\n    import_parser = subparsers.add_parser(\n        \"import\",\n        parents=[parent_parser],\n        description=append_doc_link(IMPORT_HELP, \"import\"),\n        help=IMPORT_HELP,\n        formatter_class=formatter.RawTextHelpFormatter,\n    )\n    import_parser.add_argument(\n        \"url\", help=\"Location of DVC or Git repository to download from\"\n    )\n    import_parser.add_argument(\n        \"path\", help=\"Path to a file or directory within the repository\"\n    ).complete = completion.FILE\n    import_parser.add_argument(\n        \"-o\",\n        \"--out\",\n        nargs=\"?\",\n        help=\"Destination path to download files to\",\n        metavar=\"<path>\",\n    ).complete = completion.DIR\n    import_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override destination file or folder if exists.\",\n    )\n    import_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<commit>\",\n    )\n    no_download_exec_group = import_parser.add_mutually_exclusive_group()\n    no_download_exec_group.add_argument(\n        \"--no-exec\",\n        action=\"store_true\",\n        default=False,\n        help=\"Only create .dvc file without actually importing target data.\",\n    )\n    no_download_exec_group.add_argument(\n        \"--no-download\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Create .dvc file including target data hash value(s)\"\n            \" but do not actually download the file(s).\"\n        ),\n    )\n    import_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    import_parser.add_argument(\n        \"--config\",\n        type=str,\n        help=(\n            \"Path to a config file that will be merged with the config \"\n            \"in the target repository.\"\n        ),\n    )\n    import_parser.add_argument(\n        \"--remote\",\n        type=str,\n        help=\"Remote name to set as a default in the target repository.\",\n    ).complete = completion.REMOTE\n    import_parser.add_argument(\n        \"--remote-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=(\n            \"Remote config options to merge with a remote's config (default or one \"\n            \"specified by '--remote') in the target repository.\"\n        ),\n    )\n    import_parser.set_defaults(func=CmdImport)\n"
  },
  {
    "path": "dvc/commands/imp_db.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase, CmdBaseNoRepo\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdTestDb(CmdBaseNoRepo):\n    def run(self):\n        from dvc.config import Config\n        from dvc.database import client\n        from dvc.exceptions import DvcException\n\n        connection = self.args.conn\n        db_config = Config.from_cwd().get(\"db\", {})\n        if connection not in db_config:\n            raise DvcException(f\"connection {connection} not found in config\")\n\n        config = db_config.get(connection, {})\n        if self.args.url:\n            config[\"url\"] = self.args.url\n        if self.args.username:\n            config[\"username\"] = self.args.username\n        if self.args.password:\n            config[\"password\"] = self.args.password\n        with client(config) as db:\n            ui.write(f\"Testing with {db}\", styled=True)\n            db.test_connection()\n        ui.write(\"Connection successful\", styled=True)\n\n\nclass CmdImportDb(CmdBase):\n    def run(self):\n        self.repo.imp_db(\n            sql=self.args.sql,\n            table=self.args.table,\n            output_format=self.args.output_format,\n            out=self.args.out,\n            force=self.args.force,\n            connection=self.args.conn,\n        )\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    IMPORT_HELP = \"Snapshot a table or a SQL query result to a CSV/JSON format\"\n    import_parser = subparsers.add_parser(\n        \"import-db\",\n        parents=[parent_parser],\n        description=append_doc_link(IMPORT_HELP, \"import-db\"),\n        help=IMPORT_HELP,\n        formatter_class=formatter.RawTextHelpFormatter,\n    )\n    group = import_parser.add_mutually_exclusive_group(required=True)\n    group.add_argument(\"--sql\", help=\"SQL query to snapshot\")\n    group.add_argument(\"--table\", help=\"Table to snapshot\")\n    import_parser.add_argument(\n        \"--output-format\",\n        default=\"csv\",\n        const=\"csv\",\n        nargs=\"?\",\n        choices=[\"csv\", \"json\"],\n        help=\"Export format\",\n    )\n    import_parser.add_argument(\n        \"-o\",\n        \"--out\",\n        nargs=\"?\",\n        help=\"Destination path to download files to\",\n        metavar=\"<path>\",\n    ).complete = completion.FILE\n    import_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override destination file or folder if exists.\",\n    )\n    import_parser.add_argument(\n        \"--conn\",\n        required=True,\n        help=\"Database connection to use, needs to be set in config\",\n    )\n\n    import_parser.set_defaults(func=CmdImportDb)\n\n    TEST_DB_HELP = \"Test the database connection\"\n    test_db_parser = subparsers.add_parser(\n        \"test-db\",\n        parents=[parent_parser],\n        description=append_doc_link(TEST_DB_HELP, \"test-db\"),\n        add_help=False,\n    )\n    test_db_parser.add_argument(\"--conn\", required=True)\n    test_db_parser.add_argument(\"--url\")\n    test_db_parser.add_argument(\"--password\")\n    test_db_parser.add_argument(\"--username\")\n    test_db_parser.set_defaults(func=CmdTestDb)\n"
  },
  {
    "path": "dvc/commands/imp_url.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdImportUrl(CmdBase):\n    def run(self):\n        try:\n            self.repo.imp_url(\n                self.args.url,\n                out=self.args.out,\n                no_exec=self.args.no_exec,\n                no_download=self.args.no_download,\n                remote=self.args.remote,\n                to_remote=self.args.to_remote,\n                jobs=self.args.jobs,\n                force=self.args.force,\n                version_aware=self.args.version_aware,\n                fs_config=self.args.fs_config,\n            )\n        except DvcException:\n            logger.exception(\n                (\n                    \"failed to import %s. You could also try downloading \"\n                    \"it manually, and adding it with `dvc add`.\"\n                ),\n                self.args.url,\n            )\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    IMPORT_HELP = \"Download or copy file from URL and take it under DVC control.\"\n\n    import_parser = subparsers.add_parser(\n        \"import-url\",\n        parents=[parent_parser],\n        description=append_doc_link(IMPORT_HELP, \"import-url\"),\n        help=IMPORT_HELP,\n        formatter_class=formatter.RawTextHelpFormatter,\n    )\n    import_parser.add_argument(\n        \"url\",\n        help=(\n            \"Location of the data to download. Supported URLs:\\n\"\n            \"/absolute/path/to/file/or/dir\\n\"\n            \"relative/path/to/file/or/dir\\n\"\n            \"C:\\\\\\\\path\\\\to\\\\file\\\\or\\\\dir\\n\"\n            \"https://example.com/path/to/file\\n\"\n            \"s3://bucket/key/path\\n\"\n            \"gs://bucket/path/to/file/or/dir\\n\"\n            \"hdfs://example.com/path/to/file\\n\"\n            \"ssh://example.com/absolute/path/to/file/or/dir\\n\"\n            \"remote://remote_name/path/to/file/or/dir (see `dvc remote`)\"\n        ),\n    )\n    import_parser.add_argument(\n        \"out\", nargs=\"?\", help=\"Destination path to put files to.\"\n    ).complete = completion.DIR\n    import_parser.add_argument(\n        \"--to-remote\",\n        action=\"store_true\",\n        default=False,\n        help=\"Download it directly to the remote\",\n    )\n    import_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to download to\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    no_download_exec_group = import_parser.add_mutually_exclusive_group()\n    no_download_exec_group.add_argument(\n        \"--no-exec\",\n        action=\"store_true\",\n        default=False,\n        help=\"Only create .dvc file without actually importing target data.\",\n    )\n    no_download_exec_group.add_argument(\n        \"--no-download\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Create .dvc file including target data hash value(s)\"\n            \" but do not actually download the file(s).\"\n        ),\n    )\n    import_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    import_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Override local file or folder if exists.\",\n    )\n    import_parser.add_argument(\n        \"--version-aware\",\n        action=\"store_true\",\n        default=False,\n        help=\"Import using cloud versioning. Implied if the URL contains a version ID.\",\n    )\n    import_parser.add_argument(\n        \"--fs-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=\"Config options for the target url.\",\n    )\n    import_parser.set_defaults(func=CmdImportUrl)\n"
  },
  {
    "path": "dvc/commands/init.py",
    "content": "import colorama\n\nfrom dvc import analytics\nfrom dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.utils import boxify\nfrom dvc.utils import format_link as fmt_link\n\nlogger = logger.getChild(__name__)\n\n\ndef _welcome_message():\n    from dvc.ui import ui\n\n    if analytics.is_enabled():\n        ui.write(\n            boxify(\n                \"DVC has enabled anonymous aggregate usage analytics.\\n\"\n                \"Read the analytics documentation (and how to opt-out) here:\\n\"\n                + fmt_link(\"https://dvc.org/doc/user-guide/analytics\"),\n                border_color=\"red\",\n            )\n        )\n\n    msg = (\n        \"{yellow}What's next?{nc}\\n\"\n        \"{yellow}------------{nc}\\n\"\n        f\"- Check out the documentation: {fmt_link('https://dvc.org/doc')}\\n\"\n        f\"- Get help and share ideas: {fmt_link('https://dvc.org/chat')}\\n\"\n        f\"- Star us on GitHub: {fmt_link('https://github.com/treeverse/dvc')}\"\n    ).format(yellow=colorama.Fore.YELLOW, nc=colorama.Fore.RESET)\n\n    ui.write(msg)\n\n\nclass CmdInit(CmdBaseNoRepo):\n    def run(self):\n        from dvc.exceptions import InitError\n        from dvc.repo import Repo\n\n        try:\n            with Repo.init(\n                self.args.directory,\n                no_scm=self.args.no_scm,\n                force=self.args.force,\n                subdir=self.args.subdir,\n            ) as repo:\n                self.config = repo.config\n                _welcome_message()\n        except InitError:\n            logger.exception(\"failed to initiate DVC\")\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    \"\"\"Setup parser for `dvc init`.\"\"\"\n    INIT_HELP = \"Initialize DVC repository.\"\n    INIT_DESCRIPTION = (\n        \"Initialize DVC repository in the given directory (defaults to the current \"\n        \"working directory).\\n\"\n        \"Expects directory to be a Git repository unless --no-scm option is specified.\"\n    )\n\n    init_parser = subparsers.add_parser(\n        \"init\",\n        parents=[parent_parser],\n        description=append_doc_link(INIT_DESCRIPTION, \"init\"),\n        help=INIT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    init_parser.add_argument(\n        \"directory\",\n        nargs=\"?\",\n        default=\".\",\n        help=(\n            \"Directory to initialize DVC in. Defaults to the current working directory.\"\n        ),\n    )\n    init_parser.add_argument(\n        \"--no-scm\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Initiate DVC in directory that is not tracked by any SCM tool (e.g. Git).\"\n        ),\n    )\n    init_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Overwrite existing '.dvc/' directory. This operation removes local cache.\"\n        ),\n    )\n    init_parser.add_argument(\n        \"--subdir\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Necessary for running this command inside a subdirectory of a \"\n            \"parent SCM repository.\"\n        ),\n    )\n    init_parser.set_defaults(func=CmdInit)\n"
  },
  {
    "path": "dvc/commands/install.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdInstall(CmdBase):\n    def run(self):\n        try:\n            self.repo.install(self.args.use_pre_commit_tool)\n        except DvcException:\n            logger.exception(\"failed to install DVC Git hooks\")\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    INSTALL_HELP = \"Install DVC git hooks into the repository.\"\n    install_parser = subparsers.add_parser(\n        \"install\",\n        parents=[parent_parser],\n        description=append_doc_link(INSTALL_HELP, \"install\"),\n        help=INSTALL_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    install_parser.add_argument(\n        \"--use-pre-commit-tool\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Install DVC hooks using pre-commit \"\n            \"(https://pre-commit.com) if it is installed.\"\n        ),\n    )\n    install_parser.set_defaults(func=CmdInstall)\n"
  },
  {
    "path": "dvc/commands/ls/__init__.py",
    "content": "from typing import Callable\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.commands.ls.ls_colors import LsColors\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\ndef _get_formatter(with_color: bool = False) -> Callable[[dict], str]:\n    def fmt(entry: dict) -> str:\n        return entry[\"path\"]\n\n    if with_color:\n        ls_colors = LsColors()\n        return ls_colors.format\n\n    return fmt\n\n\ndef _format_entry(entry, name, with_size=True, with_hash=False):\n    from dvc.utils.humanize import naturalsize\n\n    ret = []\n    if with_size:\n        size = entry.get(\"size\")\n        if size is None or (size <= 0 and entry.get(\"isdir\")):\n            size = \"\"\n        else:\n            size = naturalsize(size)\n        ret.append(size)\n    if with_hash:\n        md5 = entry.get(\"md5\", \"\")\n        ret.append(md5)\n    ret.append(name)\n    return ret\n\n\ndef show_entries(entries, with_color=False, with_size=False, with_hash=False):\n    fmt = _get_formatter(with_color)\n    if with_size or with_hash:\n        colalign = (\"right\",) if with_size else None\n        ui.table(\n            [\n                _format_entry(\n                    entry,\n                    fmt(entry),\n                    with_size=with_size,\n                    with_hash=with_hash,\n                )\n                for entry in entries\n            ],\n            colalign=colalign,\n        )\n        return\n\n    # NOTE: this is faster than ui.table for very large number of entries\n    ui.write(\"\\n\".join(fmt(entry) for entry in entries))\n\n\nclass TreePart:\n    Edge = \"├── \"\n    Line = \"│   \"\n    Corner = \"└── \"\n    Blank = \"    \"\n\n\ndef _build_tree_structure(\n    entries, with_color=False, with_size=False, with_hash=False, _depth=0, _prefix=\"\"\n):\n    rows = []\n    fmt = _get_formatter(with_color)\n\n    num_entries = len(entries)\n    for i, (name, entry) in enumerate(entries.items()):\n        entry[\"path\"] = name\n        is_last = i >= num_entries - 1\n        tree_part = \"\"\n        if _depth > 0:\n            tree_part = TreePart.Corner if is_last else TreePart.Edge\n\n        row = _format_entry(\n            entry,\n            _prefix + tree_part + fmt(entry),\n            with_size=with_size,\n            with_hash=with_hash,\n        )\n        rows.append(row)\n\n        if contents := entry.get(\"contents\"):\n            new_prefix = _prefix\n            if _depth > 0:\n                new_prefix += TreePart.Blank if is_last else TreePart.Line\n            new_rows = _build_tree_structure(\n                contents,\n                with_color=with_color,\n                with_size=with_size,\n                with_hash=with_hash,\n                _depth=_depth + 1,\n                _prefix=new_prefix,\n            )\n            rows.extend(new_rows)\n\n    return rows\n\n\ndef show_tree(entries, with_color=False, with_size=False, with_hash=False):\n    import tabulate\n\n    rows = _build_tree_structure(\n        entries,\n        with_color=with_color,\n        with_size=with_size,\n        with_hash=with_hash,\n    )\n\n    colalign = (\"right\",) if with_size else None\n\n    _orig = tabulate.PRESERVE_WHITESPACE\n    tabulate.PRESERVE_WHITESPACE = True\n    try:\n        ui.table(rows, colalign=colalign)\n    finally:\n        tabulate.PRESERVE_WHITESPACE = _orig\n\n\nclass CmdList(CmdBaseNoRepo):\n    def _show_tree(self):\n        from dvc.repo.ls import ls_tree\n\n        entries = ls_tree(\n            self.args.url,\n            self.args.path,\n            rev=self.args.rev,\n            dvc_only=self.args.dvc_only,\n            config=self.args.config,\n            remote=self.args.remote,\n            remote_config=self.args.remote_config,\n            maxdepth=self.args.level,\n        )\n        show_tree(\n            entries,\n            with_color=True,\n            with_size=self.args.size,\n            with_hash=self.args.show_hash,\n        )\n        return 0\n\n    def _show_list(self):\n        from dvc.repo import Repo\n\n        entries = Repo.ls(\n            self.args.url,\n            self.args.path,\n            rev=self.args.rev,\n            recursive=self.args.recursive,\n            dvc_only=self.args.dvc_only,\n            config=self.args.config,\n            remote=self.args.remote,\n            remote_config=self.args.remote_config,\n            maxdepth=self.args.level,\n        )\n        if self.args.json:\n            ui.write_json(entries)\n        elif entries:\n            show_entries(\n                entries,\n                with_color=True,\n                with_size=self.args.size,\n                with_hash=self.args.show_hash,\n            )\n        return 0\n\n    def run(self):\n        if self.args.tree and self.args.json:\n            raise DvcException(\"Cannot use --tree and --json options together.\")\n\n        try:\n            if self.args.tree:\n                return self._show_tree()\n            return self._show_list()\n        except FileNotFoundError:\n            logger.exception(\"\")\n            return 1\n        except DvcException:\n            logger.exception(\"failed to list '%s'\", self.args.url)\n            return 1\n\n\ndef add_parser(subparsers, parent_parser):\n    LIST_HELP = (\n        \"List repository contents, including files\"\n        \" and directories tracked by DVC and by Git.\"\n    )\n    list_parser = subparsers.add_parser(\n        \"list\",\n        aliases=[\"ls\"],\n        parents=[parent_parser],\n        description=append_doc_link(LIST_HELP, \"list\"),\n        help=LIST_HELP,\n        formatter_class=formatter.RawTextHelpFormatter,\n    )\n    list_parser.add_argument(\"url\", help=\"Location of DVC repository to list\")\n    list_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        help=\"Recursively list files.\",\n    )\n    list_parser.add_argument(\n        \"-T\",\n        \"--tree\",\n        action=\"store_true\",\n        help=\"Recurse into directories as a tree.\",\n    )\n    list_parser.add_argument(\n        \"-L\",\n        \"--level\",\n        metavar=\"depth\",\n        type=int,\n        help=\"Limit the depth of recursion.\",\n    )\n    list_parser.add_argument(\n        \"--dvc-only\", action=\"store_true\", help=\"Show only DVC outputs.\"\n    )\n    list_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        help=\"Show output in JSON format.\",\n    )\n    list_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<commit>\",\n    )\n    list_parser.add_argument(\n        \"--config\",\n        type=str,\n        help=(\n            \"Path to a config file that will be merged with the config \"\n            \"in the target repository.\"\n        ),\n    )\n    list_parser.add_argument(\n        \"--remote\",\n        type=str,\n        help=\"Remote name to set as a default in the target repository.\",\n    ).complete = completion.REMOTE\n    list_parser.add_argument(\n        \"--remote-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=(\n            \"Remote config options to merge with a remote's config (default or one \"\n            \"specified by '--remote') in the target repository.\"\n        ),\n    )\n    list_parser.add_argument(\"--size\", action=\"store_true\", help=\"Show sizes.\")\n    list_parser.add_argument(\n        \"--show-hash\",\n        help=\"Display hash value for each item.\",\n        action=\"store_true\",\n        default=False,\n    )\n    list_parser.add_argument(\n        \"path\",\n        nargs=\"?\",\n        help=\"Path to directory within the repository to list outputs for\",\n    ).complete = completion.DIR\n    list_parser.set_defaults(func=CmdList)\n"
  },
  {
    "path": "dvc/commands/ls/ls_colors.py",
    "content": "import os\n\n\nclass LsColors:\n    default = \"rs=0:di=01;34:ex=01;32\"\n\n    def __init__(self, lscolors=None):\n        self._extensions = {}\n        self._codes = {}\n        self._load(lscolors or os.environ.get(\"LS_COLORS\") or LsColors.default)\n\n    def _load(self, lscolors):\n        for item in lscolors.split(\":\"):\n            try:\n                code, color = item.split(\"=\", 1)\n            except ValueError:\n                continue\n            if code.startswith(\"*.\"):\n                self._extensions[code[1:]] = color\n            else:\n                self._codes[code] = color\n\n    def format(self, entry):\n        text = entry[\"path\"]\n\n        if entry.get(\"isout\", False) and \"out\" in self._codes:\n            return self._format(text, code=\"out\")\n\n        if entry.get(\"isdir\", False):\n            return self._format(text, code=\"di\")\n\n        if entry.get(\"isexec\", False):\n            return self._format(text, code=\"ex\")\n\n        stem, ext = os.path.splitext(text)\n        if not ext and stem.startswith(\".\"):\n            ext = stem\n        return self._format(text, ext=ext)\n\n    def _format(self, text, code=None, ext=None):\n        val = None\n        if ext:\n            val = self._extensions.get(ext, None)\n        if code:\n            val = self._codes.get(code, None)\n\n        if not val:\n            return text\n        rs = self._codes.get(\"rs\", 0)\n        return f\"\\033[{val}m{text}\\033[{rs}m\"\n"
  },
  {
    "path": "dvc/commands/ls_url.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import DictAction, append_doc_link\nfrom dvc.log import logger\n\nfrom .ls import show_entries, show_tree\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdListUrl(CmdBaseNoRepo):\n    def _show_tree(self, config):\n        from dvc.fs import parse_external_url\n        from dvc.repo.ls import _ls_tree\n\n        fs, fs_path = parse_external_url(\n            self.args.url, fs_config=self.args.fs_config, config=config\n        )\n        entries = _ls_tree(fs, fs_path, maxdepth=self.args.level)\n        show_tree(entries, with_color=True, with_size=self.args.size)\n        return 0\n\n    def _show_list(self, config):\n        from dvc.repo import Repo\n\n        entries = Repo.ls_url(\n            self.args.url,\n            recursive=self.args.recursive,\n            maxdepth=self.args.level,\n            fs_config=self.args.fs_config,\n            config=config,\n        )\n        if entries:\n            show_entries(entries, with_color=True, with_size=self.args.size)\n        return 0\n\n    def run(self):\n        from dvc.config import Config\n\n        config = Config.from_cwd()\n        if self.args.tree:\n            return self._show_tree(config=config)\n        return self._show_list(config=config)\n\n\ndef add_parser(subparsers, parent_parser):\n    LS_HELP = \"List directory contents from URL.\"\n    lsurl_parser = subparsers.add_parser(\n        \"list-url\",\n        aliases=[\"ls-url\"],\n        parents=[parent_parser],\n        description=append_doc_link(LS_HELP, \"list-url\"),\n        help=LS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    lsurl_parser.add_argument(\n        \"url\", help=\"See `dvc import-url -h` for full list of supported URLs.\"\n    )\n    lsurl_parser.add_argument(\n        \"-R\", \"--recursive\", action=\"store_true\", help=\"Recursively list files.\"\n    )\n    lsurl_parser.add_argument(\n        \"-T\",\n        \"--tree\",\n        action=\"store_true\",\n        help=\"Recurse into directories as a tree.\",\n    )\n    lsurl_parser.add_argument(\n        \"-L\",\n        \"--level\",\n        metavar=\"depth\",\n        type=int,\n        help=\"Limit the depth of recursion.\",\n    )\n    lsurl_parser.add_argument(\"--size\", action=\"store_true\", help=\"Show sizes.\")\n    lsurl_parser.add_argument(\n        \"--fs-config\",\n        type=str,\n        nargs=\"*\",\n        action=DictAction,\n        help=\"Config options for the target url.\",\n    )\n    lsurl_parser.set_defaults(func=CmdListUrl)\n"
  },
  {
    "path": "dvc/commands/metrics.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils.serialize import encode_exception\n\nlogger = logger.getChild(__name__)\n\n\nDEFAULT_PRECISION = 5\n\n\nclass CmdMetricsBase(CmdBase):\n    UNINITIALIZED = True\n\n\nclass CmdMetricsShow(CmdMetricsBase):\n    def run(self):\n        from dvc.repo.metrics.show import to_relpath\n        from dvc.utils import errored_revisions\n\n        metrics = self.repo.metrics.show(\n            self.args.targets,\n            all_branches=self.args.all_branches,\n            all_tags=self.args.all_tags,\n            all_commits=self.args.all_commits,\n        )\n        metrics = {\n            k: to_relpath(self.repo.fs, self.repo.root_dir, v)\n            for k, v in metrics.items()\n        }\n\n        if errored := errored_revisions(metrics):\n            ui.error_write(\n                \"DVC failed to load some metrics for following revisions:\"\n                f\" '{', '.join(errored)}'.\"\n            )\n\n        if self.args.json:\n            ui.write_json(metrics, default=encode_exception)\n        else:\n            from dvc.compare import show_metrics\n\n            show_metrics(\n                metrics,\n                markdown=self.args.markdown,\n                all_branches=self.args.all_branches,\n                all_tags=self.args.all_tags,\n                all_commits=self.args.all_commits,\n                precision=self.args.precision or DEFAULT_PRECISION,\n                round_digits=True,\n            )\n\n        return 0\n\n\nclass CmdMetricsDiff(CmdMetricsBase):\n    def run(self):\n        import os\n        from os.path import relpath\n\n        diff_result = self.repo.metrics.diff(\n            a_rev=self.args.a_rev,\n            b_rev=self.args.b_rev,\n            targets=self.args.targets,\n            all=self.args.all,\n        )\n\n        errored = [rev for rev, err in diff_result.get(\"errors\", {}).items() if err]\n        if errored:\n            ui.error_write(\n                \"DVC failed to load some metrics for following revisions:\"\n                f\" '{', '.join(errored)}'.\"\n            )\n\n        start = relpath(os.getcwd(), self.repo.root_dir)\n        diff = diff_result.get(\"diff\", {})\n        diff = {relpath(path, start): result for path, result in diff.items()}\n\n        if self.args.json:\n            ui.write_json(diff)\n        else:\n            from dvc.compare import show_diff\n\n            show_diff(\n                diff,\n                title=\"Metric\",\n                markdown=self.args.markdown,\n                no_path=self.args.no_path,\n                precision=self.args.precision or DEFAULT_PRECISION,\n                round_digits=True,\n                a_rev=self.args.a_rev,\n                b_rev=self.args.b_rev,\n            )\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    METRICS_HELP = \"Commands to display and compare metrics.\"\n\n    metrics_parser = subparsers.add_parser(\n        \"metrics\",\n        parents=[parent_parser],\n        description=append_doc_link(METRICS_HELP, \"metrics\"),\n        help=METRICS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    metrics_subparsers = metrics_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc metrics CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    METRICS_SHOW_HELP = \"Print metrics, with optional formatting.\"\n    metrics_show_parser = metrics_subparsers.add_parser(\n        \"show\",\n        parents=[parent_parser],\n        description=append_doc_link(METRICS_SHOW_HELP, \"metrics/show\"),\n        help=METRICS_SHOW_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    metrics_show_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Limit command scope to these metrics files. Using -R, \"\n            \"directories to search metrics files in can also be given.\"\n        ),\n    ).complete = completion.FILE\n    metrics_show_parser.add_argument(\n        \"-a\",\n        \"--all-branches\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show metrics for all branches.\",\n    )\n    metrics_show_parser.add_argument(\n        \"-T\",\n        \"--all-tags\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show metrics for all tags.\",\n    )\n    metrics_show_parser.add_argument(\n        \"-A\",\n        \"--all-commits\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show metrics for all commits.\",\n    )\n    metrics_show_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    metrics_show_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n    )\n    metrics_show_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"If any target is a directory, recursively search and process \"\n            \"metrics files.\"\n        ),\n    )\n    metrics_show_parser.add_argument(\n        \"--precision\",\n        type=int,\n        help=(\n            \"Round metrics to `n` digits precision after the decimal point. \"\n            f\"Rounds to {DEFAULT_PRECISION} digits by default.\"\n        ),\n        metavar=\"<n>\",\n    )\n    metrics_show_parser.set_defaults(func=CmdMetricsShow)\n\n    METRICS_DIFF_HELP = (\n        \"Show changes in metrics between commits in the DVC repository, or \"\n        \"between a commit and the workspace.\"\n    )\n    metrics_diff_parser = metrics_subparsers.add_parser(\n        \"diff\",\n        parents=[parent_parser],\n        description=append_doc_link(METRICS_DIFF_HELP, \"metrics/diff\"),\n        help=METRICS_DIFF_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    metrics_diff_parser.add_argument(\n        \"a_rev\",\n        nargs=\"?\",\n        help=\"Old Git commit to compare (defaults to HEAD)\",\n        default=\"HEAD\",\n    )\n    metrics_diff_parser.add_argument(\n        \"b_rev\",\n        default=\"workspace\",\n        nargs=\"?\",\n        help=\"New Git commit to compare (defaults to the current workspace)\",\n    )\n    metrics_diff_parser.add_argument(\n        \"--targets\",\n        nargs=\"*\",\n        help=(\n            \"Specific metrics file(s) to compare \"\n            \"(even if not found as `metrics` in `dvc.yaml`). \"\n            \"Using -R, directories to search metrics files in \"\n            \"can also be given.\"\n            \"Shows all tracked metrics by default.\"\n        ),\n        metavar=\"<paths>\",\n    ).complete = completion.FILE\n    metrics_diff_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"If any target is a directory, recursively search and process \"\n            \"metrics files.\"\n        ),\n    )\n    metrics_diff_parser.add_argument(\n        \"--all\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show unchanged metrics as well.\",\n    )\n    metrics_diff_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    metrics_diff_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n    )\n    metrics_diff_parser.add_argument(\n        \"--no-path\",\n        action=\"store_true\",\n        default=False,\n        help=\"Don't show metric path.\",\n    )\n    metrics_diff_parser.add_argument(\n        \"--precision\",\n        type=int,\n        help=(\n            \"Round metrics to `n` digits precision after the decimal point. \"\n            f\"Rounds to {DEFAULT_PRECISION} digits by default.\"\n        ),\n        metavar=\"<n>\",\n    )\n    metrics_diff_parser.set_defaults(func=CmdMetricsDiff)\n"
  },
  {
    "path": "dvc/commands/move.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdMove(CmdBase):\n    def run(self):\n        try:\n            self.repo.move(self.args.src, self.args.dst)\n        except DvcException:\n            msg = f\"failed to move '{self.args.src}' -> '{self.args.dst}'\"\n            logger.exception(msg)\n            return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    MOVE_HELP = \"Rename or move a DVC controlled data file or a directory.\"\n    MOVE_DESCRIPTION = (\n        \"Rename or move a DVC controlled data file or a directory.\\n\"\n        \"It renames and modifies the corresponding .dvc file to reflect the\"\n        \" changes.\"\n    )\n\n    move_parser = subparsers.add_parser(\n        \"move\",\n        aliases=[\"mv\"],\n        parents=[parent_parser],\n        description=append_doc_link(MOVE_DESCRIPTION, \"move\"),\n        help=MOVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    move_parser.add_argument(\n        \"src\", help=\"Source path to a data file or directory.\"\n    ).complete = completion.FILE\n    move_parser.add_argument(\"dst\", help=\"Destination path.\").complete = completion.FILE\n    move_parser.set_defaults(func=CmdMove)\n"
  },
  {
    "path": "dvc/commands/params.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdParamsDiff(CmdBase):\n    UNINITIALIZED = True\n\n    def run(self):\n        import os\n        from os.path import relpath\n\n        diff_result = self.repo.params.diff(\n            a_rev=self.args.a_rev,\n            b_rev=self.args.b_rev,\n            targets=self.args.targets,\n            all=self.args.all,\n            deps_only=self.args.deps,\n        )\n\n        errored = [rev for rev, err in diff_result.get(\"errors\", {}).items() if err]\n        if errored:\n            ui.error_write(\n                \"DVC failed to load some metrics for following revisions:\"\n                f\" '{', '.join(errored)}'.\"\n            )\n\n        start = relpath(os.getcwd(), self.repo.root_dir)\n        diff = diff_result.get(\"diff\", {})\n        diff = {relpath(path, start): result for path, result in diff.items()}\n\n        if self.args.json:\n            ui.write_json(diff)\n        else:\n            from dvc.compare import show_diff\n\n            show_diff(\n                diff,\n                title=\"Param\",\n                markdown=self.args.markdown,\n                no_path=self.args.no_path,\n                show_changes=False,\n                a_rev=self.args.a_rev,\n                b_rev=self.args.b_rev,\n            )\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    PARAMS_HELP = \"Commands to display params.\"\n\n    params_parser = subparsers.add_parser(\n        \"params\",\n        parents=[parent_parser],\n        description=append_doc_link(PARAMS_HELP, \"params\"),\n        help=PARAMS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    params_subparsers = params_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc params CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    PARAMS_DIFF_HELP = (\n        \"Show changes in params between commits in the DVC repository, or \"\n        \"between a commit and the workspace.\"\n    )\n    params_diff_parser = params_subparsers.add_parser(\n        \"diff\",\n        parents=[parent_parser],\n        description=append_doc_link(PARAMS_DIFF_HELP, \"params/diff\"),\n        help=PARAMS_DIFF_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    params_diff_parser.add_argument(\n        \"a_rev\",\n        nargs=\"?\",\n        default=\"HEAD\",\n        help=\"Old Git commit to compare (defaults to HEAD)\",\n    )\n    params_diff_parser.add_argument(\n        \"b_rev\",\n        default=\"workspace\",\n        nargs=\"?\",\n        help=\"New Git commit to compare (defaults to the current workspace)\",\n    )\n    params_diff_parser.add_argument(\n        \"--targets\",\n        nargs=\"*\",\n        help=(\n            \"Specific params file(s) to compare \"\n            \"(even if not found as `params` in `dvc.yaml`). \"\n            \"Shows all tracked params by default.\"\n        ),\n        metavar=\"<paths>\",\n    ).complete = completion.FILE\n    params_diff_parser.add_argument(\n        \"--all\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show unchanged params as well.\",\n    )\n    params_diff_parser.add_argument(\n        \"--deps\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show only params that are stage dependencies.\",\n    )\n    params_diff_parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in JSON format.\",\n    )\n    params_diff_parser.add_argument(\n        \"--md\",\n        action=\"store_true\",\n        default=False,\n        dest=\"markdown\",\n        help=\"Show tabulated output in the Markdown format (GFM).\",\n    )\n    params_diff_parser.add_argument(\n        \"--no-path\",\n        action=\"store_true\",\n        default=False,\n        help=\"Don't show params path.\",\n    )\n    params_diff_parser.set_defaults(func=CmdParamsDiff)\n"
  },
  {
    "path": "dvc/commands/plots.py",
    "content": "import argparse\nimport os\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import compact, first, get_in\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils import format_link\n\nif TYPE_CHECKING:\n    from dvc.render.match import RendererWithErrors\n\n\nlogger = logger.getChild(__name__)\n\n\ndef _show_json(\n    renderers_with_errors: list[\"RendererWithErrors\"],\n    split=False,\n    errors: Optional[dict[str, Exception]] = None,\n):\n    from dvc.render.convert import to_json\n    from dvc.utils.serialize import encode_exception\n\n    all_errors: list[dict] = []\n    data = {}\n\n    for renderer, src_errors, def_errors in renderers_with_errors:\n        name = renderer.name\n        data[name] = to_json(renderer, split)\n        all_errors.extend(\n            {\"name\": name, \"rev\": rev, \"source\": source, **encode_exception(e)}\n            for rev, per_rev_src_errors in src_errors.items()\n            for source, e in per_rev_src_errors.items()\n        )\n        all_errors.extend(\n            {\"name\": name, \"rev\": rev, **encode_exception(e)}\n            for rev, e in def_errors.items()\n        )\n\n    # these errors are not tied to any renderers\n    errors = errors or {}\n    all_errors.extend({\"rev\": rev, **encode_exception(e)} for rev, e in errors.items())\n\n    ui.write_json(compact({\"errors\": all_errors, \"data\": data}), highlight=False)\n\n\nclass CmdPlots(CmdBase):\n    def _func(self, *args, **kwargs):\n        raise NotImplementedError\n\n    def _props(self):\n        from dvc.schema import PLOT_PROPS\n\n        # Pass only props specified by user, to not shadow ones from plot def\n        props = {p: getattr(self.args, p) for p in PLOT_PROPS}\n        return {k: v for k, v in props.items() if v is not None}\n\n    def _html_template_path(self):\n        html_template_path = self.args.html_template\n        if not html_template_path:\n            html_template_path = self.repo.config.get(\"plots\", {}).get(\n                \"html_template\", None\n            )\n            if html_template_path and not os.path.isabs(html_template_path):\n                assert self.repo.dvc_dir\n                html_template_path = os.path.join(self.repo.dvc_dir, html_template_path)\n        return html_template_path\n\n    def run(self) -> int:  # noqa: C901, PLR0911\n        from pathlib import Path\n\n        from dvc.render.match import match_defs_renderers\n        from dvc_render import render_html\n\n        if self.args.show_vega:\n            if not self.args.targets:\n                logger.error(\"please specify a target for `--show-vega`\")\n                return 1\n            if len(self.args.targets) > 1:\n                logger.error(\"you can only specify one target for `--show-vega`\")\n                return 1\n            if self.args.json:\n                logger.error(\n                    \"'--show-vega' and '--json' are mutually exclusive options.\"\n                )\n                return 1\n\n        try:\n            plots_data = self._func(targets=self.args.targets, props=self._props())\n\n            if not plots_data and not self.args.json:\n                ui.error_write(\n                    \"No plots were loaded, visualization file will not be created.\"\n                )\n\n            out: str = self.args.out or self.repo.config.get(\"plots\", {}).get(\n                \"out_dir\", \"dvc_plots\"\n            )\n\n            renderers_out = out if self.args.json else os.path.join(out, \"static\")\n            renderers_with_errors = match_defs_renderers(\n                data=plots_data,\n                out=renderers_out,\n                templates_dir=self.repo.plots.templates_dir,\n            )\n            if self.args.json:\n                errors = compact(\n                    {\n                        rev: get_in(data, [\"definitions\", \"error\"])\n                        for rev, data in plots_data.items()\n                    }\n                )\n                _show_json(renderers_with_errors, self.args.split, errors=errors)\n                return 0\n\n            renderers = [r.renderer for r in renderers_with_errors]\n            if self.args.show_vega:\n                renderer = first(filter(lambda r: r.TYPE == \"vega\", renderers))\n                if renderer:\n                    ui.write_json(renderer.get_filled_template())\n                return 0\n\n            output_file: Path = (Path.cwd() / out).resolve() / \"index.html\"\n\n            if renderers:\n                render_html(\n                    renderers=renderers,\n                    output_file=output_file,\n                    html_template=self._html_template_path(),\n                )\n\n                ui.write(output_file.as_uri())\n                auto_open = self.repo.config[\"plots\"].get(\"auto_open\", False)\n                if self.args.open or auto_open:\n                    if not auto_open:\n                        ui.write(\n                            \"To enable auto opening, you can run:\\n\"\n                            \"\\n\"\n                            \"\\tdvc config plots.auto_open true\"\n                        )\n                    return ui.open_browser(output_file)\n\n            return 0\n\n        except DvcException:\n            logger.exception(\"\")\n            return 1\n\n\nclass CmdPlotsShow(CmdPlots):\n    UNINITIALIZED = True\n\n    def _func(self, *args, **kwargs):\n        return self.repo.plots.show(*args, **kwargs)\n\n\nclass CmdPlotsDiff(CmdPlots):\n    UNINITIALIZED = True\n\n    def _func(self, *args, **kwargs):\n        return self.repo.plots.diff(\n            *args,\n            revs=self.args.revisions,\n            experiment=self.args.experiment,\n            **kwargs,\n        )\n\n\nclass CmdPlotsModify(CmdPlots):\n    def run(self):\n        self.repo.plots.modify(\n            self.args.target, props=self._props(), unset=self.args.unset\n        )\n        return 0\n\n\nclass CmdPlotsTemplates(CmdBase):\n    def run(self):\n        from dvc.exceptions import InvalidArgumentError\n        from dvc_render.vega_templates import TEMPLATES\n\n        try:\n            target = self.args.template\n            if target:\n                for template in TEMPLATES:\n                    if target == template.DEFAULT_NAME:\n                        ui.write_json(template.DEFAULT_CONTENT)\n                        return 0\n                raise InvalidArgumentError(f\"Unexpected template: {target}.\")\n\n            for template in TEMPLATES:\n                ui.write(template.DEFAULT_NAME)\n\n            return 0\n        except DvcException:\n            logger.exception(\"\")\n            return 1\n\n\ndef add_parser(subparsers, parent_parser):\n    PLOTS_HELP = \"Commands to visualize and compare plot data.\"\n\n    plots_parser = subparsers.add_parser(\n        \"plots\",\n        parents=[parent_parser],\n        description=append_doc_link(PLOTS_HELP, \"plots\"),\n        help=PLOTS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    plots_subparsers = plots_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc plots CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    SHOW_HELP = (\n        \"Generate plots from target files or from `plots` definitions in `dvc.yaml`.\"\n    )\n    plots_show_parser = plots_subparsers.add_parser(\n        \"show\",\n        parents=[parent_parser],\n        description=append_doc_link(SHOW_HELP, \"plots/show\"),\n        help=SHOW_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    plots_show_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=(\n            \"Plots files or plot IDs from `dvc.yaml` to visualize. \"\n            \"Shows all plots by default.\"\n        ),\n    ).complete = completion.FILE\n    _add_props_arguments(plots_show_parser)\n    _add_output_argument(plots_show_parser)\n    _add_ui_arguments(plots_show_parser)\n    plots_show_parser.set_defaults(func=CmdPlotsShow)\n\n    PLOTS_DIFF_HELP = (\n        \"Show multiple versions of a plot by overlaying them in a single image.\"\n    )\n    plots_diff_parser = plots_subparsers.add_parser(\n        \"diff\",\n        parents=[parent_parser],\n        description=append_doc_link(PLOTS_DIFF_HELP, \"plots/diff\"),\n        help=PLOTS_DIFF_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    plots_diff_parser.add_argument(\n        \"--targets\",\n        nargs=\"*\",\n        help=(\n            \"Specific plots to visualize. \"\n            \"Accepts any file path or plot name from `dvc.yaml` file. \"\n            \"Shows all tracked plots by default.\"\n        ),\n        metavar=\"<paths>\",\n    ).complete = completion.FILE\n    plots_diff_parser.add_argument(\n        \"-e\",\n        \"--experiment\",\n        action=\"store_true\",\n        default=False,\n        help=argparse.SUPPRESS,\n    )\n    plots_diff_parser.add_argument(\n        \"revisions\", nargs=\"*\", default=None, help=\"Git commits to plot from\"\n    )\n    _add_props_arguments(plots_diff_parser)\n    _add_output_argument(plots_diff_parser)\n    _add_ui_arguments(plots_diff_parser)\n    plots_diff_parser.set_defaults(func=CmdPlotsDiff)\n\n    PLOTS_MODIFY_HELP = (\n        \"Modify display properties of data-series plots \"\n        \"defined in stages (has no effect on image plots).\"\n    )\n    plots_modify_parser = plots_subparsers.add_parser(\n        \"modify\",\n        parents=[parent_parser],\n        description=append_doc_link(PLOTS_MODIFY_HELP, \"plots/modify\"),\n        help=PLOTS_MODIFY_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    plots_modify_parser.add_argument(\n        \"target\",\n        help=\"Plots file to set properties for (defined at the stage level).\",\n    ).complete = completion.FILE\n    _add_props_arguments(plots_modify_parser)\n    plots_modify_parser.add_argument(\n        \"--unset\",\n        nargs=\"*\",\n        metavar=\"<property>\",\n        help=\"Unset one or more display properties.\",\n    )\n    plots_modify_parser.set_defaults(func=CmdPlotsModify)\n\n    TEMPLATES_HELP = \"List built-in plots templates or show JSON specification for one.\"\n    plots_templates_parser = plots_subparsers.add_parser(\n        \"templates\",\n        parents=[parent_parser],\n        description=append_doc_link(TEMPLATES_HELP, \"plots/templates\"),\n        help=TEMPLATES_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    plots_templates_parser.add_argument(\n        \"template\",\n        default=None,\n        nargs=\"?\",\n        help=(\n            \"Template for which to show JSON specification. \"\n            \"List all template names by default.\"\n        ),\n    )\n    plots_templates_parser.set_defaults(func=CmdPlotsTemplates)\n\n\ndef _add_props_arguments(parser):\n    parser.add_argument(\n        \"-t\",\n        \"--template\",\n        nargs=\"?\",\n        default=None,\n        help=\"Special JSON or HTML schema file to inject with the data. See {}\".format(\n            format_link(\"https://man.dvc.org/plots#plot-templates\")\n        ),\n        metavar=\"<path>\",\n    ).complete = completion.FILE\n    parser.add_argument(\n        \"-x\", default=None, help=\"Field name for X axis.\", metavar=\"<field>\"\n    )\n    parser.add_argument(\n        \"-y\", default=None, help=\"Field name for Y axis.\", metavar=\"<field>\"\n    )\n    parser.add_argument(\n        \"--no-header\",\n        action=\"store_false\",\n        dest=\"header\",\n        default=None,  # Use default None to distinguish when it's not used\n        help=\"Provided CSV or TSV datafile does not have a header.\",\n    )\n    parser.add_argument(\"--title\", default=None, metavar=\"<text>\", help=\"Plot title.\")\n    parser.add_argument(\n        \"--x-label\", default=None, help=\"X axis label\", metavar=\"<text>\"\n    )\n    parser.add_argument(\n        \"--y-label\", default=None, help=\"Y axis label\", metavar=\"<text>\"\n    )\n\n\ndef _add_output_argument(parser, typ=\"plots\"):\n    parser.add_argument(\n        \"-o\",\n        \"--out\",\n        default=None,\n        help=f\"Directory to save {typ} to.\",\n        metavar=\"<path>\",\n    ).complete = completion.DIR\n\n\ndef _add_ui_arguments(parser):\n    parser.add_argument(\n        \"--show-vega\",\n        action=\"store_true\",\n        default=False,\n        help=\"Show output in Vega format.\",\n    )\n    parser.add_argument(\n        \"--json\",\n        action=\"store_true\",\n        default=False,\n        help=argparse.SUPPRESS,\n    )\n    parser.add_argument(\n        \"--split\", action=\"store_true\", default=False, help=argparse.SUPPRESS\n    )\n    parser.add_argument(\n        \"--open\",\n        action=\"store_true\",\n        default=False,\n        help=\"Open plot file directly in the browser.\",\n    )\n    parser.add_argument(\n        \"--html-template\",\n        default=None,\n        help=\"Custom HTML template for VEGA visualization.\",\n        metavar=\"<path>\",\n    )\n"
  },
  {
    "path": "dvc/commands/queue/__init__.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.queue import kill, logs, remove, start, status, stop\n\nSUB_COMMANDS = [start, stop, status, logs, remove, kill]\n\n\ndef add_parser(subparsers, parent_parser):\n    QUEUE_HELP = \"Commands to manage experiments queue.\"\n\n    queue_parser = subparsers.add_parser(\n        \"queue\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_HELP, \"queue\"),\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n        help=QUEUE_HELP,\n    )\n\n    queue_subparsers = queue_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc queue CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    for cmd in SUB_COMMANDS:\n        cmd.add_parser(queue_subparsers, parent_parser)\n"
  },
  {
    "path": "dvc/commands/queue/kill.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueKill(CmdBase):\n    \"\"\"Kill exp task in queue.\"\"\"\n\n    def run(self):\n        self.repo.experiments.celery_queue.kill(\n            revs=self.args.task, force=self.args.force\n        )\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_KILL_HELP = (\n        \"Gracefully interrupt running experiment queue tasks (equivalent to Ctrl-C)\"\n    )\n    queue_kill_parser = queue_subparsers.add_parser(\n        \"kill\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_KILL_HELP, \"queue/kill\"),\n        help=QUEUE_KILL_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_kill_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Forcefully and immediately kill running experiment queue tasks\",\n    )\n    queue_kill_parser.add_argument(\n        \"task\",\n        nargs=\"*\",\n        help=\"Tasks in queue to kill.\",\n        metavar=\"<task>\",\n    )\n    queue_kill_parser.set_defaults(func=CmdQueueKill)\n"
  },
  {
    "path": "dvc/commands/queue/logs.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueLogs(CmdBase):\n    \"\"\"Show output logs for a queued experiment.\"\"\"\n\n    def run(self):\n        self.repo.experiments.celery_queue.logs(\n            rev=self.args.task,\n            encoding=self.args.encoding,\n            follow=self.args.follow,\n        )\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_LOGS_HELP = (\n        \"Show output logs for running and completed experiment queue tasks.\"\n    )\n    queue_logs_parser = queue_subparsers.add_parser(\n        \"logs\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_LOGS_HELP, \"queue/logs\"),\n        help=QUEUE_LOGS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_logs_parser.add_argument(\n        \"-e\",\n        \"--encoding\",\n        help=\"Text encoding for log output. Defaults to system locale encoding.\",\n        metavar=\"<encoding>\",\n    )\n    queue_logs_parser.add_argument(\n        \"-f\",\n        \"--follow\",\n        help=(\n            \"Attach to task and follow additional live output. Only \"\n            \"applicable if the task is still running.\"\n        ),\n        action=\"store_true\",\n    )\n    queue_logs_parser.add_argument(\"task\", help=\"Task to show.\", metavar=\"<task>\")\n    queue_logs_parser.set_defaults(func=CmdQueueLogs)\n"
  },
  {
    "path": "dvc/commands/queue/remove.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueRemove(CmdBase):\n    \"\"\"Remove exp in queue.\"\"\"\n\n    def check_arguments(self):\n        clear_flag = any(\n            [\n                self.args.all,\n                self.args.queued,\n                self.args.failed,\n                self.args.success,\n            ]\n        )\n        if not (clear_flag ^ bool(self.args.task)):\n            raise InvalidArgumentError(\n                \"Either provide an `tasks` argument, or use the \"\n                \"`--all`, `--queued`, `--failed`, `--success` flag.\"\n            )\n\n    def run(self):\n        self.check_arguments()\n\n        if self.args.all:\n            self.args.queued = True\n            self.args.failed = True\n            self.args.success = True\n\n        if self.args.queued or self.args.failed or self.args.success:\n            removed_list = self.repo.experiments.celery_queue.clear(\n                success=self.args.success,\n                queued=self.args.queued,\n                failed=self.args.failed,\n            )\n        else:\n            removed_list = self.repo.experiments.celery_queue.remove(\n                revs=self.args.task,\n            )\n\n        if removed_list:\n            removed = \", \".join(removed_list)\n            ui.write(f\"Removed tasks in queue: {removed}\")\n        else:\n            ui.write(f\"No tasks found named {self.args.task}\")\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_REMOVE_HELP = \"Remove queued and completed tasks from the queue.\"\n    queue_remove_parser = queue_subparsers.add_parser(\n        \"remove\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_REMOVE_HELP, \"queue/remove\"),\n        help=QUEUE_REMOVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_remove_parser.add_argument(\n        \"--all\",\n        action=\"store_true\",\n        help=\"Remove all queued and completed tasks from the queue.\",\n    )\n    queue_remove_parser.add_argument(\n        \"--queued\",\n        action=\"store_true\",\n        help=\"Remove all queued tasks from the queue.\",\n    )\n    queue_remove_parser.add_argument(\n        \"--success\",\n        action=\"store_true\",\n        help=\"Remove all successful tasks from the queue.\",\n    )\n    queue_remove_parser.add_argument(\n        \"--failed\",\n        action=\"store_true\",\n        help=\"Remove all failed tasks from the queue.\",\n    )\n    queue_remove_parser.add_argument(\n        \"task\",\n        nargs=\"*\",\n        help=\"Tasks to remove.\",\n        metavar=\"<task>\",\n    )\n    queue_remove_parser.set_defaults(func=CmdQueueRemove)\n"
  },
  {
    "path": "dvc/commands/queue/start.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueStart(CmdBase):\n    \"\"\"Start exp queue workers.\"\"\"\n\n    def run(self):\n        started = self.repo.experiments.celery_queue.start_workers(self.args.jobs)\n\n        suffix = \"s\" if started > 1 else \"\"\n        ui.write(f\"Started '{started}' new experiments task queue worker{suffix}.\")\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_START_HELP = \"Start the experiments task queue worker.\"\n    queue_start_parser = queue_subparsers.add_parser(\n        \"start\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_START_HELP, \"queue/start\"),\n        help=QUEUE_START_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_start_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        default=1,\n        help=\"Maximum number of concurrent queue workers to start. Defaults to 1.\",\n        metavar=\"<number>\",\n    )\n    queue_start_parser.set_defaults(func=CmdQueueStart)\n"
  },
  {
    "path": "dvc/commands/queue/status.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.compare import TabularData\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueStatus(CmdBase):\n    \"\"\"Show queue task and worker status.\"\"\"\n\n    def run(self) -> int:\n        from dvc.repo.experiments.show import format_time\n\n        result = self.repo.experiments.celery_queue.status()\n        if result:\n            all_headers = [\"Task\", \"Name\", \"Created\", \"Status\"]\n            td = TabularData(all_headers)\n            for exp in result:\n                created = format_time(exp.get(\"timestamp\"))\n                assert exp[\"rev\"]\n                assert exp[\"status\"]\n                td.append(\n                    [\n                        exp[\"rev\"][:7],\n                        exp.get(\"name\") or \"\",\n                        created,\n                        exp[\"status\"],\n                    ]\n                )\n            td.render()\n        else:\n            ui.write(\"No experiment tasks in the queue.\")\n        ui.write()\n\n        worker_status = self.repo.experiments.celery_queue.worker_status()\n        active_count = len([name for name, task in worker_status.items() if task])\n        idle_count = len(worker_status) - active_count\n\n        ui.write(f\"Worker status: {active_count} active, {idle_count} idle\")\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_STATUS_HELP = \"Show the status of experiments queue tasks and workers.\"\n    queue_status_parser = queue_subparsers.add_parser(\n        \"status\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_STATUS_HELP, \"queue/status\"),\n        help=QUEUE_STATUS_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_status_parser.set_defaults(func=CmdQueueStatus)\n"
  },
  {
    "path": "dvc/commands/queue/stop.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdQueueStop(CmdBase):\n    \"\"\"Stop exp queue workers.\"\"\"\n\n    def run(self):\n        self.repo.experiments.celery_queue.shutdown(kill=self.args.kill)\n\n        if self.args.kill:\n            ui.write(\n                \"All running tasks in the queue have been killed.\"\n                \"Queue workers are stopping.\"\n            )\n        else:\n            ui.write(\"Queue workers will stop after running tasks finish.\")\n\n        return 0\n\n\ndef add_parser(queue_subparsers, parent_parser):\n    QUEUE_STOP_HELP = \"Stop all experiments task queue workers.\"\n    queue_stop_parser = queue_subparsers.add_parser(\n        \"stop\",\n        parents=[parent_parser],\n        description=append_doc_link(QUEUE_STOP_HELP, \"queue/stop\"),\n        help=QUEUE_STOP_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    queue_stop_parser.add_argument(\n        \"--kill\",\n        action=\"store_true\",\n        help=\"Kill all running tasks before stopping the queue workers.\",\n    )\n    queue_stop_parser.set_defaults(func=CmdQueueStop)\n"
  },
  {
    "path": "dvc/commands/remote.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.config import CmdConfig\nfrom dvc.ui import ui\nfrom dvc.utils import format_link\n\n\nclass CmdRemote(CmdConfig):\n    def __init__(self, args):\n        super().__init__(args)\n\n        if getattr(self.args, \"name\", None):\n            self.args.name = self.args.name.lower()\n\n    def _check_exists(self, conf):\n        from dvc.config import ConfigError\n\n        if self.args.name not in conf[\"remote\"]:\n            raise ConfigError(f\"remote '{self.args.name}' doesn't exist.\")\n\n\nclass CmdRemoteAdd(CmdRemote):\n    def run(self):\n        from dvc.config import ConfigError\n\n        if self.args.default:\n            ui.write(f\"Setting '{self.args.name}' as a default remote.\")\n\n        with self.config.edit(self.args.level) as conf:\n            if self.args.name in conf[\"remote\"] and not self.args.force:\n                raise ConfigError(\n                    f\"remote '{self.args.name}' already exists. Use `-f|--force` to \"\n                    \"overwrite it.\"\n                )\n\n            conf[\"remote\"][self.args.name] = {\"url\": self.args.url}\n            if self.args.default:\n                conf[\"core\"][\"remote\"] = self.args.name\n\n        return 0\n\n\nclass CmdRemoteRemove(CmdRemote):\n    def run(self):\n        with self.config.edit(self.args.level) as conf:\n            self._check_exists(conf)\n            del conf[\"remote\"][self.args.name]\n\n        up_to_level = self.args.level or \"repo\"\n        # Remove core.remote refs to this remote in any shadowing configs\n        for level in reversed(self.config.LEVELS):\n            with self.config.edit(level) as conf:\n                if conf[\"core\"].get(\"remote\") == self.args.name:\n                    del conf[\"core\"][\"remote\"]\n\n            if level == up_to_level:\n                break\n\n        return 0\n\n\nclass CmdRemoteModify(CmdRemote):\n    def run(self):\n        from dvc.config import merge\n\n        with self.config.edit(self.args.level) as conf:\n            merged = self.config.load_config_to_level(self.args.level)\n            merge(merged, conf)\n            self._check_exists(merged)\n\n            if self.args.name not in conf[\"remote\"]:\n                conf[\"remote\"][self.args.name] = {}\n            section = conf[\"remote\"][self.args.name]\n            if self.args.unset:\n                section.pop(self.args.option, None)\n            else:\n                section[self.args.option] = self.args.value\n        return 0\n\n\nclass CmdRemoteDefault(CmdRemote):\n    def run(self):\n        from dvc.config import ConfigError\n\n        if self.args.name is None and not self.args.unset:\n            conf = self.config.read(self.args.level)\n            try:\n                ui.write(conf[\"core\"][\"remote\"])\n            except KeyError:\n                ui.write(\"No default remote set\")\n                return 1\n        else:\n            with self.config.edit(self.args.level) as conf:\n                if self.args.unset:\n                    conf[\"core\"].pop(\"remote\", None)\n                else:\n                    merged_conf = self.config.load_config_to_level(self.args.level)\n                    if (\n                        self.args.name in conf[\"remote\"]\n                        or self.args.name in merged_conf[\"remote\"]\n                    ):\n                        conf[\"core\"][\"remote\"] = self.args.name\n                    else:\n                        raise ConfigError(\n                            \"default remote must be present in remote list.\"\n                        )\n        return 0\n\n\nclass CmdRemoteList(CmdRemote):\n    def run(self):\n        conf = self.config.read(self.args.level)\n        default_remote = conf[\"core\"].get(\"remote\")\n\n        for name, remote_conf in conf[\"remote\"].items():\n            if name == default_remote:\n                text = f\"{name}\\t{remote_conf['url']}\\t(default)\"\n                color = \"green\"\n            else:\n                text = f\"{name}\\t{remote_conf['url']}\"\n                color = \"\"\n            ui.write(ui.rich_text(text, style=color), styled=True)\n        return 0\n\n\nclass CmdRemoteRename(CmdRemote):\n    def _rename_default(self, conf):\n        if conf[\"core\"].get(\"remote\") == self.args.name:\n            conf[\"core\"][\"remote\"] = self.args.new\n\n    def run(self):\n        from dvc.config import ConfigError\n\n        all_config = self.config.load_config_to_level(None)\n        if self.args.new in all_config.get(\"remote\", {}):\n            raise ConfigError(\n                f\"Rename failed. Remote name {self.args.new!r} already exists.\"\n            )\n\n        with self.config.edit(self.args.level) as conf:\n            self._check_exists(conf)\n            conf[\"remote\"][self.args.new] = conf[\"remote\"][self.args.name]\n            del conf[\"remote\"][self.args.name]\n            self._rename_default(conf)\n\n        up_to_level = self.args.level or \"repo\"\n        for level in reversed(self.config.LEVELS):\n            if level == up_to_level:\n                break\n            with self.config.edit(level) as level_conf:\n                self._rename_default(level_conf)\n\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    from dvc.commands.config import parent_config_parser\n\n    REMOTE_HELP = \"Set up and manage data remotes.\"\n    remote_parser = subparsers.add_parser(\n        \"remote\",\n        parents=[parent_parser],\n        description=append_doc_link(REMOTE_HELP, \"remote\"),\n        help=REMOTE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    remote_subparsers = remote_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc remote CMD --help` for command-specific help.\",\n        required=True,\n    )\n\n    REMOTE_ADD_HELP = \"Add a new data remote.\"\n    remote_add_parser = remote_subparsers.add_parser(\n        \"add\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_ADD_HELP, \"remote/add\"),\n        help=REMOTE_ADD_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_add_parser.add_argument(\"name\", help=\"Name of the remote\")\n    remote_add_parser.add_argument(\n        \"url\",\n        help=\"Remote location. See full list of supported URLs at {}\".format(\n            format_link(\"https://man.dvc.org/remote\")\n        ),\n    )\n    remote_add_parser.add_argument(\n        \"-d\",\n        \"--default\",\n        action=\"store_true\",\n        default=False,\n        help=\"Set as default remote.\",\n    )\n    remote_add_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Force overwriting existing configs\",\n    )\n    remote_add_parser.set_defaults(func=CmdRemoteAdd)\n\n    REMOTE_DEFAULT_HELP = \"Set/unset the default data remote.\"\n    remote_default_parser = remote_subparsers.add_parser(\n        \"default\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_DEFAULT_HELP, \"remote/default\"),\n        help=REMOTE_DEFAULT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_default_parser.add_argument(\n        \"name\", nargs=\"?\", help=\"Name of the remote\"\n    ).complete = completion.REMOTE\n    remote_default_parser.add_argument(\n        \"-u\",\n        \"--unset\",\n        action=\"store_true\",\n        default=False,\n        help=\"Unset default remote.\",\n    )\n    remote_default_parser.set_defaults(func=CmdRemoteDefault)\n\n    REMOTE_MODIFY_HELP = \"Modify the configuration of a data remote.\"\n    remote_modify_parser = remote_subparsers.add_parser(\n        \"modify\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_MODIFY_HELP, \"remote/modify\"),\n        help=REMOTE_MODIFY_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_modify_parser.add_argument(\n        \"name\", help=\"Name of the remote\"\n    ).complete = completion.REMOTE\n    remote_modify_parser.add_argument(\"option\", help=\"Name of the option to modify.\")\n    remote_modify_parser.add_argument(\n        \"value\", nargs=\"?\", help=\"(optional) Value of the option.\"\n    )\n    remote_modify_parser.add_argument(\n        \"-u\",\n        \"--unset\",\n        default=False,\n        action=\"store_true\",\n        help=\"Unset option.\",\n    )\n    remote_modify_parser.set_defaults(func=CmdRemoteModify)\n\n    REMOTE_LIST_HELP = \"List all available data remotes.\"\n    remote_list_parser = remote_subparsers.add_parser(\n        \"list\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_LIST_HELP, \"remote/list\"),\n        help=REMOTE_LIST_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_list_parser.set_defaults(func=CmdRemoteList)\n\n    REMOTE_REMOVE_HELP = \"Remove a data remote.\"\n    remote_remove_parser = remote_subparsers.add_parser(\n        \"remove\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_REMOVE_HELP, \"remote/remove\"),\n        help=REMOTE_REMOVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_remove_parser.add_argument(\n        \"name\", help=\"Name of the remote to remove.\"\n    ).complete = completion.REMOTE\n    remote_remove_parser.set_defaults(func=CmdRemoteRemove)\n    REMOTE_RENAME_HELP = \"Rename a DVC remote\"\n    remote_rename_parser = remote_subparsers.add_parser(\n        \"rename\",\n        parents=[parent_config_parser, parent_parser],\n        description=append_doc_link(REMOTE_RENAME_HELP, \"remote/rename\"),\n        help=REMOTE_RENAME_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remote_rename_parser.add_argument(\"name\", help=\"Remote to be renamed\")\n    remote_rename_parser.add_argument(\"new\", help=\"New name of the remote\")\n    remote_rename_parser.set_defaults(func=CmdRemoteRename)\n"
  },
  {
    "path": "dvc/commands/remove.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdRemove(CmdBase):\n    def run(self):\n        for target in self.args.targets:\n            try:\n                self.repo.remove(target, outs=self.args.outs)\n            except DvcException:\n                logger.exception(\"\")\n                return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    REMOVE_HELP = (\n        \"Remove stages from dvc.yaml and/or stop tracking files or directories.\"\n    )\n    remove_parser = subparsers.add_parser(\n        \"remove\",\n        aliases=[\"rm\"],\n        parents=[parent_parser],\n        description=append_doc_link(REMOVE_HELP, \"remove\"),\n        help=REMOVE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    remove_parser.add_argument(\n        \"--outs\",\n        action=\"store_true\",\n        default=False,\n        help=\"Remove outputs as well.\",\n    )\n    remove_parser.add_argument(\n        \"targets\",\n        nargs=\"+\",\n        help=\".dvc files or stages from dvc.yaml to remove.\",\n    ).complete = completion.DVC_FILE\n    remove_parser.set_defaults(func=CmdRemove)\n"
  },
  {
    "path": "dvc/commands/repro.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.status import CmdDataStatus\n\n\nclass CmdRepro(CmdBase):\n    def run(self):\n        from dvc.ui import ui\n\n        stages = self.repo.reproduce(**self._common_kwargs, **self._repro_kwargs)\n        if len(stages) == 0:\n            ui.write(CmdDataStatus.UP_TO_DATE_MSG)\n        else:\n            ui.write(\"Use `dvc push` to send your updates to remote storage.\")\n\n        return 0\n\n    @property\n    def _common_kwargs(self):\n        return {\n            \"targets\": self.args.targets,\n            \"single_item\": self.args.single_item,\n            \"force\": self.args.force,\n            \"dry\": self.args.dry,\n            \"interactive\": self.args.interactive,\n            \"pipeline\": self.args.pipeline,\n            \"all_pipelines\": self.args.all_pipelines,\n            \"downstream\": self.args.downstream,\n            \"recursive\": self.args.recursive,\n            \"force_downstream\": self.args.force_downstream,\n            \"pull\": self.args.pull,\n            \"allow_missing\": self.args.allow_missing,\n            \"on_error\": self.args.on_error,\n        }\n\n    @property\n    def _repro_kwargs(self):\n        return {\n            \"run_cache\": not self.args.no_run_cache,\n            \"no_commit\": self.args.no_commit,\n            \"glob\": self.args.glob,\n        }\n\n\ndef add_arguments(repro_parser):\n    repro_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        help=\"\"\"\\\nStages to reproduce. 'dvc.yaml' by default.\nThe targets can be path to a dvc.yaml file or `.dvc` file,\nor a stage name from dvc.yaml file from\ncurrent working directory. To run a stage from dvc.yaml\nfrom other directories, the target must be a path followed by colon `:`\nand then the stage name name.\n\"\"\",\n    ).complete = completion.DVCFILES_AND_STAGE\n    repro_parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Reproduce even if dependencies were not changed.\",\n    )\n    repro_parser.add_argument(\n        \"-i\",\n        \"--interactive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Ask for confirmation before reproducing each stage.\",\n    )\n    repro_parser.add_argument(\n        \"-s\",\n        \"--single-item\",\n        action=\"store_true\",\n        default=False,\n        help=\"Reproduce only single data item without recursive dependencies check.\",\n    )\n    repro_parser.add_argument(\n        \"-p\",\n        \"--pipeline\",\n        action=\"store_true\",\n        default=False,\n        help=\"Reproduce the whole pipeline that the specified targets belong to.\",\n    )\n    repro_parser.add_argument(\n        \"-P\",\n        \"--all-pipelines\",\n        action=\"store_true\",\n        default=False,\n        help=\"Reproduce all pipelines in the repo.\",\n    )\n    repro_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Reproduce all stages in the specified directory.\",\n    )\n    repro_parser.add_argument(\n        \"--downstream\",\n        action=\"store_true\",\n        default=False,\n        help=\"Start from the specified stages when reproducing pipelines.\",\n    )\n    repro_parser.add_argument(\n        \"--force-downstream\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Reproduce all descendants of a changed stage even if their \"\n            \"direct dependencies didn't change.\"\n        ),\n    )\n    repro_parser.add_argument(\n        \"--pull\",\n        action=\"store_true\",\n        default=False,\n        help=\"Try automatically pulling missing data.\",\n    )\n    repro_parser.add_argument(\n        \"--allow-missing\",\n        action=\"store_true\",\n        default=False,\n        help=(\"Skip stages with missing data but no other changes.\"),\n    )\n    repro_parser.add_argument(\n        \"--dry\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Only print the commands that would be executed without actually executing.\"\n        ),\n    )\n    repro_parser.add_argument(\n        \"-k\",\n        \"--keep-going\",\n        action=\"store_const\",\n        default=\"fail\",\n        const=\"keep-going\",\n        dest=\"on_error\",\n        help=(\n            \"Continue executing, skipping stages having dependencies \"\n            \"on the failed stages\"\n        ),\n    )\n    repro_parser.add_argument(\n        \"--ignore-errors\",\n        action=\"store_const\",\n        default=\"fail\",\n        const=\"ignore\",\n        dest=\"on_error\",\n        help=\"Ignore errors from stages.\",\n    )\n\n\ndef add_parser(subparsers, parent_parser):\n    REPRO_HELP = \"Reproduce complete or partial pipelines by executing their stages.\"\n    repro_parser = subparsers.add_parser(\n        \"repro\",\n        parents=[parent_parser],\n        description=append_doc_link(REPRO_HELP, \"repro\"),\n        help=REPRO_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    # repro/exp run shared args\n    add_arguments(repro_parser)\n    # repro only args\n    repro_parser.add_argument(\n        \"--glob\",\n        action=\"store_true\",\n        default=False,\n        help=\"Allows targets containing shell-style wildcards.\",\n    )\n    repro_parser.add_argument(\n        \"--no-commit\",\n        action=\"store_true\",\n        default=False,\n        help=\"Don't put files/directories into cache.\",\n    )\n    repro_parser.add_argument(\n        \"--no-run-cache\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Execute stage commands even if they have already been run with \"\n            \"the same command/dependencies/outputs/etc before.\"\n        ),\n    )\n    repro_parser.set_defaults(func=CmdRepro)\n"
  },
  {
    "path": "dvc/commands/root.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.utils import relpath\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdRoot(CmdBaseNoRepo):\n    def run(self):\n        from dvc.repo import Repo\n        from dvc.ui import ui\n\n        ui.write(relpath(Repo.find_root()))\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    ROOT_HELP = \"Return the relative path to the root of the DVC project.\"\n    root_parser = subparsers.add_parser(\n        \"root\",\n        parents=[parent_parser],\n        description=append_doc_link(ROOT_HELP, \"root\"),\n        help=ROOT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    root_parser.set_defaults(func=CmdRoot)\n"
  },
  {
    "path": "dvc/commands/stage.py",
    "content": "import argparse\nimport logging\nfrom collections.abc import Iterable\nfrom contextlib import contextmanager\nfrom itertools import chain, filterfalse\nfrom typing import TYPE_CHECKING\n\nfrom dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.utils.cli_parse import parse_params\nfrom dvc.utils.humanize import truncate_text\n\nif TYPE_CHECKING:\n    from dvc.output import Output\n    from dvc.stage import Stage\n\nlogger = logger.getChild(__name__)\n\nMAX_TEXT_LENGTH = 80\n\n\ndef generate_description(stage: \"Stage\") -> str:\n    def part_desc(outs: Iterable[\"Output\"]) -> str:\n        return \", \".join(out.def_path for out in outs)\n\n    if not stage.deps and not stage.outs:\n        return \"No outputs or dependencies\"\n\n    if not stage.outs and stage.deps:\n        return \"Depends on \" + part_desc(stage.deps)\n\n    def is_plot_or_metric(out: \"Output\"):\n        return bool(out.plot) or bool(out.metric)\n\n    desc: list[str] = []\n\n    outs = list(filterfalse(is_plot_or_metric, stage.outs))\n    if outs:\n        desc.append(\"Outputs \" + part_desc(outs))\n\n    plots_and_metrics = list(filter(is_plot_or_metric, stage.outs))\n    if plots_and_metrics:\n        desc.append(\"Reports \" + part_desc(plots_and_metrics))\n\n    return \"; \".join(desc)\n\n\ndef prepare_description(stage: \"Stage\", max_length: int = MAX_TEXT_LENGTH) -> str:\n    desc = stage.short_description() or generate_description(stage)\n    return truncate_text(desc, max_length)\n\n\ndef prepare_stages_data(\n    stages: Iterable[\"Stage\"],\n    description: bool = True,\n    max_length: int = MAX_TEXT_LENGTH,\n) -> dict[str, str]:\n    return {\n        stage.addressing: (\n            prepare_description(stage, max_length=max_length) if description else \"\"\n        )\n        for stage in stages\n    }\n\n\nclass CmdStageList(CmdBase):\n    def _get_stages(self) -> Iterable[\"Stage\"]:\n        if self.args.all:\n            stages: list[Stage] = self.repo.index.stages\n            logger.trace(\"%d no. of stages found\", len(stages))\n            return stages\n\n        # removing duplicates while maintaining order\n        collected = chain.from_iterable(\n            self.repo.stage.collect(target=target, recursive=self.args.recursive)\n            for target in self.args.targets\n        )\n        return dict.fromkeys(collected).keys()\n\n    def run(self):\n        from dvc.ui import ui\n\n        def log_error(relpath: str, exc: Exception):\n            if self.args.fail:\n                raise exc\n            logger.debug(\"Stages from %s failed to load\", relpath)\n\n        # silence stage collection error by default\n        self.repo.stage_collection_error_handler = log_error\n\n        stages = self._get_stages()\n        data = prepare_stages_data(stages, description=not self.args.name_only)\n        ui.table(list(data.items()))\n\n        return 0\n\n\ndef parse_cmd(commands: list[str]) -> str:\n    \"\"\"\n    We need to take into account two cases:\n\n    - ['python code.py foo bar']: Used mainly with dvc as a library\n    - ['echo', 'foo bar']: List of arguments received from the CLI\n\n    The second case would need quoting, as it was passed through:\n            dvc run echo \"foo bar\"\n    \"\"\"\n\n    def quote_argument(arg: str):\n        if not arg:\n            return '\"\"'\n        if \" \" in arg and '\"' not in arg:\n            return f'\"{arg}\"'\n        return arg\n\n    if len(commands) < 2:\n        return \" \".join(commands)\n    return \" \".join(map(quote_argument, commands))\n\n\n@contextmanager\ndef _disable_logging(highest_level=logging.CRITICAL):\n    previous_level = logging.root.manager.disable\n\n    logging.disable(highest_level)\n\n    try:\n        yield\n    finally:\n        logging.disable(previous_level)\n\n\nclass CmdStageAdd(CmdBase):\n    def run(self):\n        from dvc.repo import lock_repo\n\n        kwargs = vars(self.args)\n        kwargs.update(\n            {\n                \"cmd\": parse_cmd(kwargs.pop(\"command\")),\n                \"params\": parse_params(self.args.params),\n            }\n        )\n\n        with self.repo.scm_context, lock_repo(self.repo):\n            with _disable_logging(logging.INFO):\n                stage = self.repo.stage.add(**kwargs)\n            logger.info(\"Added stage %r in %r\", stage.addressing, stage.relpath)\n            if self.args.run:\n                stage.run()\n                stage.dump(update_pipeline=False)\n\n        return 0\n\n\ndef _add_common_args(parser):\n    parser.add_argument(\n        \"-f\",\n        \"--force\",\n        action=\"store_true\",\n        default=False,\n        help=\"Overwrite existing stage\",\n    )\n    parser.add_argument(\n        \"-d\",\n        \"--deps\",\n        action=\"append\",\n        default=[],\n        help=\"Declare dependencies for reproducible cmd.\",\n        metavar=\"<path>\",\n    ).complete = completion.FILE\n    parser.add_argument(\n        \"-p\",\n        \"--params\",\n        action=\"append\",\n        default=[],\n        help=\"Declare parameter to use as additional dependency.\",\n        metavar=\"[<filename>:]<params_list>\",\n    ).complete = completion.FILE\n    parser.add_argument(\n        \"-o\",\n        \"--outs\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output file or directory.\",\n        metavar=\"<filename>\",\n    ).complete = completion.FILE\n    parser.add_argument(\n        \"-O\",\n        \"--outs-no-cache\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output file or directory (do not put into DVC cache).\",\n        metavar=\"<filename>\",\n    ).complete = completion.FILE\n    parser.add_argument(\n        \"--outs-persist\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output file or directory that will not be removed upon repro.\",\n        metavar=\"<filename>\",\n    )\n    parser.add_argument(\n        \"--outs-persist-no-cache\",\n        action=\"append\",\n        default=[],\n        help=(\n            \"Declare output file or directory that will not be \"\n            \"removed upon repro (do not put into DVC cache).\"\n        ),\n        metavar=\"<filename>\",\n    )\n    parser.add_argument(\n        \"-m\",\n        \"--metrics\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output metrics file.\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"-M\",\n        \"--metrics-no-cache\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output metrics file (do not put into DVC cache).\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"--plots\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output plot file.\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"--plots-no-cache\",\n        action=\"append\",\n        default=[],\n        help=\"Declare output plot file (do not put into DVC cache).\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"-w\",\n        \"--wdir\",\n        help=\"Directory within your repo to run your command in.\",\n        metavar=\"<path>\",\n    )\n    parser.add_argument(\n        \"--always-changed\",\n        action=\"store_true\",\n        default=False,\n        help=\"Always consider this DVC-file as changed.\",\n    )\n    parser.add_argument(\n        \"--desc\",\n        type=str,\n        metavar=\"<text>\",\n        help=(\n            \"User description of the stage (optional). \"\n            \"This doesn't affect any DVC operations.\"\n        ),\n    )\n    parser.add_argument(\n        \"--run\",\n        action=\"store_true\",\n        default=False,\n        help=\"Execute the stage after generating it.\",\n    )\n    parser.add_argument(\n        \"command\",\n        nargs=argparse.REMAINDER,\n        help=\"Command to execute.\",\n        metavar=\"command\",\n    )\n\n\ndef add_parser(subparsers, parent_parser):\n    STAGES_HELP = \"Commands to list and create stages.\"\n\n    stage_parser = subparsers.add_parser(\n        \"stage\",\n        parents=[parent_parser],\n        description=append_doc_link(STAGES_HELP, \"stage\"),\n        help=STAGES_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    stage_subparsers = stage_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `dvc stage CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    STAGE_ADD_HELP = \"Create stage\"\n    stage_add_parser = stage_subparsers.add_parser(\n        \"add\",\n        parents=[parent_parser],\n        description=append_doc_link(STAGE_ADD_HELP, \"stage/add\"),\n        help=STAGE_ADD_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    stage_add_parser.add_argument(\n        \"-n\", \"--name\", help=\"Name of the stage to add\", required=True\n    )\n    _add_common_args(stage_add_parser)\n    stage_add_parser.set_defaults(func=CmdStageAdd)\n\n    STAGE_LIST_HELP = \"List stages.\"\n    stage_list_parser = stage_subparsers.add_parser(\n        \"list\",\n        parents=[parent_parser],\n        description=append_doc_link(STAGE_LIST_HELP, \"stage/list\"),\n        help=STAGE_LIST_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    stage_list_parser.add_argument(\n        \"targets\",\n        nargs=\"*\",\n        default=[\"dvc.yaml\"],\n        help=(\n            \"Show stages from a dvc.yaml/.dvc file or a directory. \"\n            \"'dvc.yaml' by default\"\n        ),\n    )\n    stage_list_parser.add_argument(\n        \"--all\",\n        action=\"store_true\",\n        default=False,\n        help=\"List all of the stages in the repo.\",\n    )\n    stage_list_parser.add_argument(\n        \"--fail\",\n        action=\"store_true\",\n        default=False,\n        help=\"Fail immediately, do not suppress any syntax errors.\",\n    )\n    stage_list_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"List all stages inside the specified directory.\",\n    )\n    stage_list_parser.add_argument(\n        \"--name-only\",\n        \"--names-only\",\n        action=\"store_true\",\n        default=False,\n        help=\"List only stage names.\",\n    )\n    stage_list_parser.set_defaults(func=CmdStageList)\n"
  },
  {
    "path": "dvc/commands/status.py",
    "content": "from dvc.commands.data_sync import CmdDataBase\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils import format_link\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdDataStatus(CmdDataBase):\n    STATUS_LEN = 20\n    STATUS_INDENT = \"\\t\"\n    UP_TO_DATE_MSG = \"Data and pipelines are up to date.\"\n    IN_SYNC_MSG = \"Cache and remote '{remote}' are in sync.\"\n    EMPTY_PROJECT_MSG = (\n        \"There are no data or pipelines tracked in this project yet.\\n\"\n        \"See {link} to get started!\"\n    ).format(link=format_link(\"https://dvc.org/doc/start\"))\n\n    def _normalize(self, s):\n        s += \":\"\n        assert len(s) < self.STATUS_LEN\n        return s + (self.STATUS_LEN - len(s)) * \" \"\n\n    def _show(self, status, indent=0):\n        ind = indent * self.STATUS_INDENT\n\n        if isinstance(status, str):\n            ui.write(f\"{ind}{status}\")\n            return\n\n        if isinstance(status, list):\n            for entry in status:\n                self._show(entry, indent)\n            return\n\n        assert isinstance(status, dict)\n\n        for key, value in status.items():\n            if isinstance(value, str):\n                ui.write(f\"{ind}{self._normalize(value)}{key}\")\n            elif value:\n                ui.write(f\"{ind}{key}:\")\n                self._show(value, indent + 1)\n\n    def run(self):\n        from dvc.repo import lock_repo\n\n        indent = 1 if self.args.cloud else 0\n\n        with lock_repo(self.repo):\n            try:\n                st = self.repo.status(\n                    targets=self.args.targets,\n                    jobs=self.args.jobs,\n                    cloud=self.args.cloud,\n                    remote=self.args.remote,\n                    all_branches=self.args.all_branches,\n                    all_tags=self.args.all_tags,\n                    all_commits=self.args.all_commits,\n                    with_deps=self.args.with_deps,\n                    recursive=self.args.recursive,\n                    check_updates=self.args.check_updates,\n                )\n            except DvcException:\n                logger.exception(\"\")\n                return 1\n\n            if self.args.json:\n                ui.write_json(st)\n                return 0\n\n            if self.args.quiet:\n                return int(bool(st))\n\n            if st:\n                self._show(st, indent)\n                return 0\n\n            # additional hints for the user\n            if not self.repo.index.stages:\n                ui.write(self.EMPTY_PROJECT_MSG)\n            elif self.args.cloud or self.args.remote:\n                remote = self.args.remote or self.repo.config[\"core\"].get(\"remote\")\n                ui.write(self.IN_SYNC_MSG.format(remote=remote))\n            else:\n                ui.write(self.UP_TO_DATE_MSG)\n\n        return 0\n"
  },
  {
    "path": "dvc/commands/studio.py",
    "content": "import os\n\nfrom funcy import get_in\n\nfrom dvc.cli import formatter\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.commands.config import CmdConfig\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdStudioLogin(CmdConfig):\n    def run(self):\n        from dvc.env import DVC_STUDIO_URL\n        from dvc.ui import ui\n        from dvc.utils.studio import STUDIO_URL\n        from dvc_studio_client.auth import StudioAuthError, get_access_token\n\n        studio = self.config.get(\"studio\", {})\n        name = self.args.name\n        hostname = (\n            self.args.hostname\n            or os.environ.get(DVC_STUDIO_URL)\n            or studio.get(\"url\")\n            or STUDIO_URL\n        )\n        scopes = self.args.scopes\n\n        if studio.get(\"url\", hostname) == hostname and \"token\" in studio:\n            raise DvcException(\n                \"Token already exists. \"\n                \"To login with a different token, \"\n                \"logout using 'dvc studio logout'.\"\n            )\n\n        open_browser = not self.args.no_open\n        try:\n            _, access_token = get_access_token(\n                token_name=name,\n                hostname=hostname,\n                scopes=scopes,\n                open_browser=open_browser,\n                client_name=\"DVC\",\n            )\n        except StudioAuthError as e:\n            ui.error_write(str(e))\n            return 1\n\n        self.save_config(hostname, access_token)\n\n        if not self.config[\"exp\"].get(\"auto_push\", True):\n            from dvc.ui import ui\n\n            ui.warn(\n                \"exp.auto_push is disabled. \\n\"\n                \"Enable with 'dvc config exp.auto_push true' \"\n                \"to automatically push experiments to Studio.\"\n            )\n\n        config_path = self.config.files[\"global\"]\n        ui.write(f\"Authentication complete. Saved token to {config_path}.\")\n        return 0\n\n    def save_config(self, hostname, token):\n        with self.config.edit(\"global\") as conf:\n            conf[\"studio\"][\"token\"] = token\n            conf[\"studio\"][\"url\"] = hostname\n            if \"auto_push\" not in conf[\"exp\"]:\n                conf[\"exp\"][\"auto_push\"] = True\n\n\nclass CmdStudioLogout(CmdConfig):\n    def run(self):\n        from dvc.ui import ui\n\n        with self.config.edit(\"global\") as conf:\n            if not get_in(conf, [\"studio\", \"token\"]):\n                ui.error_write(\n                    \"Not logged in to Studio. Log in with 'dvc studio login'.\"\n                )\n                return 1\n\n            del conf[\"studio\"][\"token\"]\n\n        ui.write(\"Logged out from Studio (you can log back in with 'dvc studio login')\")\n        return 0\n\n\nclass CmdStudioToken(CmdConfig):\n    def run(self):\n        from dvc.ui import ui\n\n        conf = self.config.read(\"global\")\n        token = get_in(conf, [\"studio\", \"token\"])\n        if not token:\n            ui.error_write(\"Not logged in to Studio.\")\n            return 1\n\n        ui.write(token)\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    STUDIO_HELP = \"Commands to authenticate DVC with Iterative Studio\"\n    STUDIO_DESCRIPTION = (\n        \"Authenticate DVC with Studio and set the token.\"\n        \" Once this token has been properly configured,\\n\"\n        \" DVC will utilize it for seamlessly sharing live experiments\\n\"\n        \" and sending notifications to Studio regarding any experiments\"\n        \" that have been pushed.\"\n    )\n\n    studio_parser = subparsers.add_parser(\n        \"studio\",\n        parents=[parent_parser],\n        description=append_doc_link(STUDIO_DESCRIPTION, \"studio\"),\n        help=STUDIO_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    studio_subparser = studio_parser.add_subparsers(\n        dest=\"cmd\",\n        help=\"Use `DVC studio CMD --help` to display command-specific help.\",\n        required=True,\n    )\n\n    STUDIO_LOGIN_HELP = \"Authenticate DVC with Studio host\"\n    STUDIO_LOGIN_DESCRIPTION = (\n        \"By default, this command authenticates the DVC with Studio\\n\"\n        \" using default scopes and assigns a random name as the token name.\"\n    )\n    login_parser = studio_subparser.add_parser(\n        \"login\",\n        parents=[parent_parser],\n        description=append_doc_link(STUDIO_LOGIN_DESCRIPTION, \"studio/login\"),\n        help=STUDIO_LOGIN_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    login_parser.add_argument(\n        \"-H\",\n        \"--hostname\",\n        action=\"store\",\n        default=None,\n        help=\"The hostname of the Studio instance to authenticate with.\",\n    )\n    login_parser.add_argument(\n        \"-s\",\n        \"--scopes\",\n        action=\"store\",\n        default=None,\n        help=\"The scopes for the authentication token. \",\n    )\n\n    login_parser.add_argument(\n        \"-n\",\n        \"--name\",\n        action=\"store\",\n        default=None,\n        help=\"The name of the authentication token. It will be used to\\n\"\n        \"identify token shown in Studio profile.\",\n    )\n\n    login_parser.add_argument(\n        \"--no-open\",\n        action=\"store_true\",\n        default=False,\n        help=\"Use authentication flow based on user code.\\n\"\n        \"You will be presented with user code to enter in browser.\\n\"\n        \"DVC will also use this if it cannot launch browser on your behalf.\",\n    )\n    login_parser.set_defaults(func=CmdStudioLogin)\n\n    STUDIO_LOGOUT_HELP = \"Logout user from Studio\"\n    STUDIO_LOGOUT_DESCRIPTION = (\n        \"This removes the studio token from your global config.\\n\"\n    )\n\n    logout_parser = studio_subparser.add_parser(\n        \"logout\",\n        parents=[parent_parser],\n        description=append_doc_link(STUDIO_LOGOUT_DESCRIPTION, \"studio/logout\"),\n        help=STUDIO_LOGOUT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    logout_parser.set_defaults(func=CmdStudioLogout)\n\n    STUDIO_TOKEN_HELP = \"View the token dvc uses to contact Studio\"  # noqa: S105 # nosec B105\n\n    logout_parser = studio_subparser.add_parser(\n        \"token\",\n        parents=[parent_parser],\n        description=append_doc_link(STUDIO_TOKEN_HELP, \"studio/token\"),\n        help=STUDIO_TOKEN_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n\n    logout_parser.set_defaults(func=CmdStudioToken)\n"
  },
  {
    "path": "dvc/commands/unprotect.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdUnprotect(CmdBase):\n    def run(self):\n        for target in self.args.targets:\n            try:\n                self.repo.unprotect(target)\n            except DvcException:\n                msg = f\"failed to unprotect '{target}'\"\n                logger.exception(msg)\n                return 1\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    UNPROTECT_HELP = (\n        \"Unprotect tracked files or directories (when hardlinks or symlinks \"\n        \"have been enabled with `dvc config cache.type`).\"\n    )\n    unprotect_parser = subparsers.add_parser(\n        \"unprotect\",\n        parents=[parent_parser],\n        description=append_doc_link(UNPROTECT_HELP, \"unprotect\"),\n        help=UNPROTECT_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    unprotect_parser.add_argument(\n        \"targets\", nargs=\"+\", help=\"Data files/directories to unprotect.\"\n    ).complete = completion.FILE\n    unprotect_parser.set_defaults(func=CmdUnprotect)\n"
  },
  {
    "path": "dvc/commands/update.py",
    "content": "from dvc.cli import completion, formatter\nfrom dvc.cli.command import CmdBase\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdUpdate(CmdBase):\n    def run(self):\n        ret = 0\n        try:\n            self.repo.update(\n                targets=self.args.targets,\n                rev=self.args.rev,\n                recursive=self.args.recursive,\n                to_remote=self.args.to_remote,\n                no_download=self.args.no_download,\n                remote=self.args.remote,\n                jobs=self.args.jobs,\n            )\n        except DvcException:\n            logger.exception(\"failed update data\")\n            ret = 1\n        return ret\n\n\ndef add_parser(subparsers, parent_parser):\n    UPDATE_HELP = (\n        \"Update data artifact imported (via dvc import or dvc import-url) \"\n        \"from an external DVC repository or URL.\"\n    )\n    update_parser = subparsers.add_parser(\n        \"update\",\n        parents=[parent_parser],\n        description=append_doc_link(UPDATE_HELP, \"update\"),\n        help=UPDATE_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n    )\n    update_parser.add_argument(\n        \"targets\", nargs=\"+\", help=\".dvc files to update.\"\n    ).complete = completion.DVC_FILE\n    update_parser.add_argument(\n        \"--rev\",\n        nargs=\"?\",\n        help=\"Git revision (e.g. SHA, branch, tag)\",\n        metavar=\"<commit>\",\n    )\n    update_parser.add_argument(\n        \"-R\",\n        \"--recursive\",\n        action=\"store_true\",\n        default=False,\n        help=\"Update all stages in the specified directory.\",\n    )\n    update_parser.add_argument(\n        \"--no-download\",\n        action=\"store_true\",\n        default=False,\n        help=(\n            \"Update .dvc file git revision/hash value(s)\"\n            \" but do not download the file(s).\"\n        ),\n    )\n    update_parser.add_argument(\n        \"--to-remote\",\n        action=\"store_true\",\n        default=False,\n        help=\"Update data directly on the remote\",\n    )\n    update_parser.add_argument(\n        \"-r\",\n        \"--remote\",\n        help=\"Remote storage to perform updates to\",\n        metavar=\"<name>\",\n    ).complete = completion.REMOTE\n    update_parser.add_argument(\n        \"-j\",\n        \"--jobs\",\n        type=int,\n        help=(\n            \"Number of jobs to run simultaneously. \"\n            \"The default value is 4 * cpu_count(). \"\n        ),\n        metavar=\"<number>\",\n    )\n    update_parser.set_defaults(func=CmdUpdate)\n"
  },
  {
    "path": "dvc/commands/version.py",
    "content": "from dvc.cli import formatter\nfrom dvc.cli.command import CmdBaseNoRepo\nfrom dvc.cli.utils import append_doc_link\nfrom dvc.log import logger\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\nclass CmdVersion(CmdBaseNoRepo):\n    def run(self):\n        from dvc.info import get_dvc_info\n        from dvc.updater import notify_updates\n\n        dvc_info = get_dvc_info()\n        ui.write(dvc_info, force=True)\n\n        notify_updates()\n        return 0\n\n\ndef add_parser(subparsers, parent_parser):\n    VERSION_HELP = \"Display the DVC version and system/environment information.\"\n    version_parser = subparsers.add_parser(\n        \"version\",\n        parents=[parent_parser],\n        description=append_doc_link(VERSION_HELP, \"version\"),\n        help=VERSION_HELP,\n        formatter_class=formatter.RawDescriptionHelpFormatter,\n        aliases=[\"doctor\"],\n    )\n    version_parser.set_defaults(func=CmdVersion)\n"
  },
  {
    "path": "dvc/compare.py",
    "content": "from collections import abc\nfrom collections.abc import (\n    ItemsView,\n    Iterable,\n    Iterator,\n    Mapping,\n    MutableSequence,\n    Sequence,\n)\nfrom itertools import chain, repeat, zip_longest\nfrom operator import itemgetter\nfrom typing import TYPE_CHECKING, Any, Optional, Union, overload\n\nfrom funcy import reraise\n\nif TYPE_CHECKING:\n    from dvc.ui.table import CellT\n\n\nclass Column(list[\"CellT\"]):\n    pass\n\n\ndef with_value(value, default):\n    return default if value is None else value\n\n\nclass TabularData(MutableSequence[Sequence[\"CellT\"]]):\n    def __init__(self, columns: Sequence[str], fill_value: Optional[str] = \"\"):\n        self._columns: dict[str, Column] = {name: Column() for name in columns}\n        self._keys: list[str] = list(columns)\n        self._fill_value = fill_value\n        self._protected: set[str] = set()\n\n    @property\n    def columns(self) -> list[Column]:\n        return list(map(self.column, self.keys()))\n\n    def is_protected(self, col_name) -> bool:\n        return col_name in self._protected\n\n    def protect(self, *col_names: str):\n        self._protected.update(col_names)\n\n    def unprotect(self, *col_names: str):\n        self._protected = self._protected.difference(col_names)\n\n    def column(self, name: str) -> Column:\n        return self._columns[name]\n\n    def items(self) -> ItemsView[str, Column]:\n        projection = {k: self.column(k) for k in self.keys()}\n        return projection.items()\n\n    def keys(self) -> list[str]:\n        return self._keys\n\n    def _iter_col_row(self, row: Sequence[\"CellT\"]) -> Iterator[tuple[\"CellT\", Column]]:\n        for val, col in zip_longest(row, self.columns):\n            if col is None:\n                break\n            yield with_value(val, self._fill_value), col\n\n    def append(self, value: Sequence[\"CellT\"]) -> None:\n        for val, col in self._iter_col_row(value):\n            col.append(val)\n\n    def extend(self, values: Iterable[Sequence[\"CellT\"]]) -> None:\n        for row in values:\n            self.append(row)\n\n    def insert(self, index: int, value: Sequence[\"CellT\"]) -> None:\n        for val, col in self._iter_col_row(value):\n            col.insert(index, val)\n\n    def __iter__(self) -> Iterator[list[\"CellT\"]]:\n        return map(list, zip(*self.columns))\n\n    def __getattr__(self, item: str) -> Column:\n        with reraise(KeyError, AttributeError):\n            return self.column(item)\n\n    def __getitem__(self, item: Union[int, slice]):\n        func = itemgetter(item)\n        it = map(func, self.columns)\n        if isinstance(item, slice):\n            it = map(list, zip(*it))\n        return list(it)\n\n    @overload\n    def __setitem__(self, item: int, value: Sequence[\"CellT\"]) -> None: ...\n\n    @overload\n    def __setitem__(self, item: slice, value: Iterable[Sequence[\"CellT\"]]) -> None: ...\n\n    def __setitem__(self, item, value) -> None:\n        it = value\n        if isinstance(item, slice):\n            n = len(self.columns)\n            normalized_rows = (\n                chain(val, repeat(self._fill_value, n - len(val))) for val in value\n            )\n            # we need to transpose those rows into columnar format\n            # as we work in terms of column-based arrays\n            it = zip(*normalized_rows)\n\n        for i, col in self._iter_col_row(it):\n            col[item] = i\n\n    def __delitem__(self, item: Union[int, slice]) -> None:\n        for col in self.columns:\n            del col[item]\n\n    def __len__(self) -> int:\n        if not self._columns:\n            return 0\n        return len(self.columns[0])\n\n    @property\n    def shape(self) -> tuple[int, int]:\n        return len(self.columns), len(self)\n\n    def drop(self, *col_names: str) -> None:\n        for col in col_names:\n            if not self.is_protected(col):\n                self._keys.remove(col)\n                self._columns.pop(col)\n\n    def rename(self, from_col_name: str, to_col_name: str) -> None:\n        self._columns[to_col_name] = self._columns.pop(from_col_name)\n        self._keys[self._keys.index(from_col_name)] = to_col_name\n\n    def project(self, *col_names: str) -> None:\n        self.drop(*(set(self._keys) - set(col_names)))\n        self._keys = list(col_names)\n\n    def is_empty(self, col_name: str) -> bool:\n        col = self.column(col_name)\n        return not any(item != self._fill_value for item in col)\n\n    def to_csv(self) -> str:\n        import csv\n        from io import StringIO\n\n        buff = StringIO()\n        writer = csv.writer(buff)\n        writer.writerow(self.keys())\n\n        for row in self:\n            writer.writerow(row)\n        return buff.getvalue()\n\n    def add_column(self, name: str) -> None:\n        self._columns[name] = Column([self._fill_value] * len(self))\n        self._keys.append(name)\n\n    def row_from_dict(self, d: Mapping[str, \"CellT\"]) -> None:\n        keys = self.keys()\n        for key in d:\n            if key not in keys:\n                self.add_column(key)\n\n        row: list[CellT] = [\n            with_value(d.get(key), self._fill_value) for key in self.keys()\n        ]\n        self.append(row)\n\n    def render(self, **kwargs: Any):\n        from dvc.ui import ui\n\n        if kwargs.pop(\"csv\", False):\n            ui.write(self.to_csv(), end=\"\")\n        else:\n            ui.table(self, headers=self.keys(), **kwargs)\n\n    def as_dict(\n        self, cols: Optional[Iterable[str]] = None\n    ) -> Iterable[dict[str, \"CellT\"]]:\n        keys = self.keys() if cols is None else set(cols)\n        return [{k: self._columns[k][i] for k in keys} for i in range(len(self))]\n\n    def dropna(  # noqa: C901, PLR0912\n        self,\n        axis: str = \"rows\",\n        how=\"any\",\n        subset: Optional[Iterable[str]] = None,\n    ):\n        if axis not in [\"rows\", \"cols\"]:\n            raise ValueError(\n                f\"Invalid 'axis' value {axis}.Choose one of ['rows', 'cols']\"\n            )\n        if how not in [\"any\", \"all\"]:\n            raise ValueError(f\"Invalid 'how' value {how}. Choose one of ['any', 'all']\")\n\n        match_line: set = set()\n        match_any = True\n        if how == \"all\":\n            match_any = False\n\n        for n_row, row in enumerate(self):\n            for n_col, col in enumerate(row):\n                if subset and self.keys()[n_col] not in subset:\n                    continue\n                if (col == self._fill_value) is match_any:\n                    if axis == \"rows\":\n                        match_line.add(n_row)\n                        break\n                    match_line.add(self.keys()[n_col])\n\n        to_drop = match_line\n        if how == \"all\":\n            if axis == \"rows\":\n                to_drop = set(range(len(self)))\n            else:\n                to_drop = set(self.keys())\n            to_drop -= match_line\n\n        if axis == \"rows\":\n            for name in self.keys():\n                self._columns[name] = Column(\n                    [x for n, x in enumerate(self._columns[name]) if n not in to_drop]\n                )\n        else:\n            self.drop(*to_drop)\n\n    def drop_duplicates(  # noqa: C901\n        self,\n        axis: str = \"rows\",\n        subset: Optional[Iterable[str]] = None,\n        ignore_empty: bool = True,\n    ):\n        if axis not in [\"rows\", \"cols\"]:\n            raise ValueError(\n                f\"Invalid 'axis' value {axis}.Choose one of ['rows', 'cols']\"\n            )\n\n        if axis == \"cols\":\n            cols_to_drop: list[str] = []\n            for n_col, col in enumerate(self.columns):\n                if subset and self.keys()[n_col] not in subset:\n                    continue\n                # Cast to str because Text is not hashable error\n                unique_vals = {str(x) for x in col}\n                if ignore_empty and self._fill_value in unique_vals:\n                    unique_vals -= {self._fill_value}\n                if len(unique_vals) == 1:\n                    cols_to_drop.append(self.keys()[n_col])\n            self.drop(*cols_to_drop)\n\n        elif axis == \"rows\":\n            unique_rows = []\n            rows_to_drop: list[int] = []\n            for n_row, row in enumerate(self):\n                if subset:\n                    row = [\n                        col\n                        for n_col, col in enumerate(row)\n                        if self.keys()[n_col] in subset\n                    ]\n\n                tuple_row = tuple(row)\n                if tuple_row in unique_rows:\n                    rows_to_drop.append(n_row)\n                else:\n                    unique_rows.append(tuple_row)\n\n            for name in self.keys():\n                self._columns[name] = Column(\n                    [\n                        x\n                        for n, x in enumerate(self._columns[name])\n                        if n not in rows_to_drop\n                    ]\n                )\n\n\ndef _normalize_float(val: float, precision: int):\n    return f\"{val:.{precision}g}\"\n\n\ndef _format_field(\n    val: Any, precision: Optional[int] = None, round_digits: bool = False\n) -> str:\n    def _format(_val):\n        if isinstance(_val, float) and precision:\n            if round_digits:\n                return round(_val, precision)\n            return _normalize_float(_val, precision)\n        if isinstance(_val, abc.Mapping):\n            return {k: _format(v) for k, v in _val.items()}\n        if isinstance(_val, list):\n            return [_format(x) for x in _val]\n        return _val\n\n    return str(_format(val))\n\n\ndef diff_table(\n    diff,\n    title: str,\n    old: bool = True,\n    no_path: bool = False,\n    show_changes: bool = True,\n    precision: Optional[int] = None,\n    round_digits: bool = False,\n    on_empty_diff: Optional[str] = None,\n    a_rev: Optional[str] = None,\n    b_rev: Optional[str] = None,\n) -> TabularData:\n    a_rev = a_rev or \"HEAD\"\n    b_rev = b_rev or \"workspace\"\n    headers: list[str] = [\"Path\", title, a_rev, b_rev, \"Change\"]\n    fill_value = \"-\"\n    td = TabularData(headers, fill_value=fill_value)\n\n    for fname, diff_in_file in diff.items():\n        for item, change in sorted(diff_in_file.items()):\n            old_value = with_value(change.get(\"old\"), fill_value)\n            new_value = with_value(change.get(\"new\"), fill_value)\n            diff_value = with_value(change.get(\"diff\", on_empty_diff), fill_value)\n            td.append(\n                [\n                    fname,\n                    str(item),\n                    _format_field(old_value, precision, round_digits),\n                    _format_field(new_value, precision, round_digits),\n                    _format_field(diff_value, precision, round_digits),\n                ]\n            )\n\n    if no_path:\n        td.drop(\"Path\")\n\n    if not show_changes:\n        td.drop(\"Change\")\n\n    if not old:\n        td.drop(a_rev)\n        td.rename(b_rev, \"Value\")\n\n    return td\n\n\ndef show_diff(  # noqa: PLR0913\n    diff,\n    title: str,\n    old: bool = True,\n    no_path: bool = False,\n    show_changes: bool = True,\n    precision: Optional[int] = None,\n    round_digits: bool = False,\n    on_empty_diff: Optional[str] = None,\n    markdown: bool = False,\n    a_rev: Optional[str] = None,\n    b_rev: Optional[str] = None,\n) -> None:\n    td = diff_table(\n        diff,\n        title=title,\n        old=old,\n        no_path=no_path,\n        show_changes=show_changes,\n        precision=precision,\n        round_digits=round_digits,\n        on_empty_diff=on_empty_diff,\n        a_rev=a_rev,\n        b_rev=b_rev,\n    )\n    td.render(markdown=markdown)\n\n\ndef metrics_table(\n    metrics,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    precision: Optional[int] = None,\n    round_digits: bool = False,\n):\n    from dvc.utils.diff import format_dict\n    from dvc.utils.flatten import flatten\n\n    td = TabularData([\"Revision\", \"Path\"], fill_value=\"-\")\n\n    for branch, val in metrics.items():\n        for fname, metric in val.get(\"data\", {}).items():\n            row_data: dict[str, str] = {\"Revision\": branch, \"Path\": fname}\n            metric = metric.get(\"data\", {})\n            flattened = (\n                flatten(format_dict(metric))\n                if isinstance(metric, dict)\n                else {\"\": metric}\n            )\n            row_data.update(\n                {\n                    k: _format_field(v, precision, round_digits)\n                    for k, v in flattened.items()\n                }\n            )\n            td.row_from_dict(row_data)\n\n    rev, path, *metrics_headers = td.keys()\n    td.project(rev, path, *sorted(metrics_headers))\n\n    if not any([all_branches, all_tags, all_commits]):\n        td.drop(\"Revision\")\n\n    return td\n\n\ndef show_metrics(\n    metrics,\n    markdown: bool = False,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    precision: Optional[int] = None,\n    round_digits: bool = False,\n) -> None:\n    td = metrics_table(\n        metrics,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        precision=precision,\n        round_digits=round_digits,\n    )\n    td.render(markdown=markdown)\n"
  },
  {
    "path": "dvc/config.py",
    "content": "\"\"\"DVC config objects.\"\"\"\n\nimport ntpath\nimport os\nimport posixpath\nimport re\nfrom contextlib import contextmanager\nfrom functools import partial\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import compact, memoize, re_find\n\nfrom dvc.exceptions import DvcException, NotDvcRepoError\nfrom dvc.log import logger\n\nfrom .utils.objects import cached_property\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.types import DictStrAny\n\nlogger = logger.getChild(__name__)\n\n\nclass ConfigError(DvcException):\n    \"\"\"DVC config exception.\"\"\"\n\n    def __init__(self, msg):\n        super().__init__(f\"config file error: {msg}\")\n\n\nclass RemoteConfigError(ConfigError):\n    pass\n\n\nclass NoRemoteError(RemoteConfigError):\n    pass\n\n\nclass RemoteNotFoundError(RemoteConfigError):\n    pass\n\n\n@memoize\ndef get_compiled_schema():\n    from voluptuous import Schema\n\n    from .config_schema import SCHEMA\n\n    return Schema(SCHEMA)\n\n\ndef to_bool(value):\n    from .config_schema import Bool\n\n    return Bool(value)\n\n\nclass Config(dict):\n    \"\"\"Class that manages configuration files for a DVC repo.\n\n    Args:\n        dvc_dir (str): optional path to `.dvc` directory, that is used to\n            access repo-specific configs like .dvc/config and\n            .dvc/config.local.\n        validate (bool): optional flag to tell dvc if it should validate the\n            config or just load it as is. 'True' by default.\n\n    Raises:\n        ConfigError: thrown if config has an invalid format.\n    \"\"\"\n\n    SYSTEM_LEVELS = (\"system\", \"global\")\n    REPO_LEVELS = (\"repo\", \"local\")\n    # In the order they shadow each other\n    LEVELS = SYSTEM_LEVELS + REPO_LEVELS\n\n    CONFIG = \"config\"\n    CONFIG_LOCAL = \"config.local\"\n\n    def __init__(\n        self,\n        dvc_dir: Optional[str] = None,\n        local_dvc_dir: Optional[str] = None,\n        validate: bool = True,\n        fs: Optional[\"FileSystem\"] = None,\n        config: Optional[\"DictStrAny\"] = None,\n        remote: Optional[str] = None,\n        remote_config: Optional[\"DictStrAny\"] = None,\n    ):\n        from dvc.fs import LocalFileSystem\n\n        dvc_dir = os.fspath(dvc_dir) if dvc_dir else None\n        self.dvc_dir = dvc_dir\n        self.wfs = LocalFileSystem()\n        self.fs = fs or self.wfs\n\n        if dvc_dir:\n            self.dvc_dir = self.fs.abspath(dvc_dir)\n\n        self.local_dvc_dir = local_dvc_dir\n        if not fs and not local_dvc_dir:\n            self.local_dvc_dir = dvc_dir\n\n        self.load(\n            validate=validate, config=config, remote=remote, remote_config=remote_config\n        )\n\n    @classmethod\n    def from_cwd(cls, fs: Optional[\"FileSystem\"] = None, **kwargs):\n        from dvc.repo import Repo\n\n        try:\n            dvc_dir = Repo.find_dvc_dir(fs=fs)\n        except NotDvcRepoError:\n            dvc_dir = None\n\n        return cls(dvc_dir=dvc_dir, fs=fs, **kwargs)\n\n    @classmethod\n    def get_dir(cls, level):\n        from dvc.dirs import global_config_dir, system_config_dir\n\n        assert level in (\"global\", \"system\")\n\n        if level == \"global\":\n            return global_config_dir()\n        if level == \"system\":\n            return system_config_dir()\n        return None\n\n    @cached_property\n    def files(self) -> dict[str, str]:\n        files = {\n            level: os.path.join(self.get_dir(level), self.CONFIG)\n            for level in (\"system\", \"global\")\n        }\n\n        if self.dvc_dir is not None:\n            files[\"repo\"] = self.fs.join(self.dvc_dir, self.CONFIG)\n\n        if self.local_dvc_dir is not None:\n            files[\"local\"] = self.wfs.join(self.local_dvc_dir, self.CONFIG_LOCAL)\n\n        return files\n\n    @staticmethod\n    def init(dvc_dir):\n        \"\"\"Initializes dvc config.\n\n        Args:\n            dvc_dir (str): path to .dvc directory.\n\n        Returns:\n            dvc.config.Config: config object.\n        \"\"\"\n        config_file = os.path.join(dvc_dir, Config.CONFIG)\n        with open(config_file, \"w+\", encoding=\"utf-8\"):\n            return Config(dvc_dir)\n\n    def merge(self, config):\n        merge(self, config)\n\n    def load(\n        self,\n        validate: bool = True,\n        config: Optional[\"DictStrAny\"] = None,\n        remote: Optional[str] = None,\n        remote_config: Optional[\"DictStrAny\"] = None,\n    ):\n        \"\"\"Loads config from all the config files.\n\n        Raises:\n            ConfigError: thrown if config has an invalid format.\n        \"\"\"\n        conf = self.load_config_to_level()\n\n        if config is not None:\n            merge(conf, config)\n\n        if validate:\n            conf = self.validate(conf)\n\n        self.clear()\n\n        if remote:\n            conf[\"core\"][\"remote\"] = remote\n\n        if remote_config:\n            remote = remote or conf[\"core\"].get(\"remote\")\n            if not remote:\n                raise ValueError(\"Missing remote name\")\n\n            merge(conf, {\"remote\": {remote: remote_config}})\n\n        self.update(conf)\n\n    def _get_fs(self, level):\n        # NOTE: this might be a Gitfs, which doesn't see things outside of\n        # the repo.\n        return self.fs if level == \"repo\" else self.wfs\n\n    @staticmethod\n    def load_file(path, fs=None) -> dict:\n        from configobj import ConfigObj, ConfigObjError\n\n        from dvc.fs import localfs\n\n        fs = fs or localfs\n\n        with fs.open(path) as fobj:\n            try:\n                conf_obj = ConfigObj(fobj)\n            except UnicodeDecodeError as exc:\n                raise ConfigError(str(exc)) from exc\n            except ConfigObjError as exc:\n                raise ConfigError(str(exc)) from exc\n\n        return _parse_named(_lower_keys(conf_obj.dict()))\n\n    def _load_config(self, level):\n        filename = self.files[level]\n        fs = self._get_fs(level)\n\n        try:\n            return self.load_file(filename, fs=fs)\n        except FileNotFoundError:\n            return {}\n\n    def _save_config(self, level, conf_dict):\n        from configobj import ConfigObj\n\n        filename = self.files[level]\n        fs = self._get_fs(level)\n\n        logger.debug(\"Writing '%s'.\", filename)\n\n        fs.makedirs(os.path.dirname(filename))\n\n        config = ConfigObj(_pack_named(conf_dict))\n        with fs.open(filename, \"wb\") as fobj:\n            config.write(fobj)\n        config.filename = filename\n\n    def load_one(self, level):\n        conf = self._load_config(level)\n        conf = self._load_paths(conf, self.files[level])\n\n        # Auto-verify sections\n        for key in get_compiled_schema().schema:\n            conf.setdefault(key, {})\n\n        return conf\n\n    @staticmethod\n    def _resolve(conf_dir, path):\n        from .config_schema import ExpPath, RelPath\n\n        if re.match(r\"\\w+://\", path):\n            return path\n\n        if os.name == \"nt\" and posixpath.isabs(path) and ntpath.sep not in path:\n            return path\n\n        if os.path.isabs(path):\n            return path\n\n        # on windows convert slashes to backslashes\n        # to have path compatible with abs_conf_dir\n        if os.path.sep == \"\\\\\" and \"/\" in path:\n            if path.startswith(\"/\"):\n                path = path.replace(\"/\", \"\\\\\\\\\", 1)\n            path = path.replace(\"/\", \"\\\\\")\n\n        expanded = os.path.expanduser(path)\n        if os.path.isabs(expanded):\n            return ExpPath(expanded, path)\n\n        return RelPath(os.path.abspath(os.path.join(conf_dir, path)))\n\n    @classmethod\n    def _load_paths(cls, conf, filename):\n        conf_dir = os.path.abspath(os.path.dirname(filename))\n        resolve = partial(cls._resolve, conf_dir)\n\n        return Config._map_dirs(conf, resolve)\n\n    @staticmethod\n    def _to_relpath(conf_dir, path):\n        from dvc.fs import localfs\n        from dvc.utils import relpath\n\n        from .config_schema import ExpPath, RelPath\n\n        if re.match(r\"\\w+://\", path):\n            return path\n\n        if isinstance(path, ExpPath):\n            return path.def_path\n\n        if os.path.expanduser(path) != path:\n            return localfs.as_posix(path)\n\n        if os.name == \"nt\" and posixpath.isabs(path) and ntpath.sep not in path:\n            return path\n\n        if isinstance(path, RelPath) or not os.path.isabs(path):\n            path = relpath(path, conf_dir)\n            return localfs.as_posix(path)\n\n        return path\n\n    @staticmethod\n    def _save_paths(conf, filename):\n        conf_dir = os.path.dirname(filename)\n        rel = partial(Config._to_relpath, conf_dir)\n\n        return Config._map_dirs(conf, rel)\n\n    @staticmethod\n    def _map_dirs(conf, func):\n        from voluptuous import ALLOW_EXTRA, Schema\n\n        dirs_schema = {\n            \"cache\": {\"dir\": func},\n            \"remote\": {\n                str: {\n                    \"url\": func,\n                    \"gdrive_user_credentials_file\": func,\n                    \"gdrive_service_account_json_file_path\": func,\n                    \"credentialpath\": func,\n                    \"configpath\": func,\n                    \"keyfile\": func,\n                    \"cert_path\": func,\n                    \"key_path\": func,\n                }\n            },\n            \"machine\": {\n                str: {\n                    \"startup_script\": func,\n                    \"setup_script\": func,\n                }\n            },\n        }\n        return Schema(dirs_schema, extra=ALLOW_EXTRA)(conf)\n\n    def load_config_to_level(self, level=None):\n        merged_conf: dict = {}\n        for merge_level in self.LEVELS:\n            if merge_level == level:\n                break\n            if merge_level in self.files:\n                merge(merged_conf, self.load_one(merge_level))\n        return merged_conf\n\n    def read(self, level=None):\n        # NOTE: we read from a merged config by default, same as git config\n        if level is None:\n            return self.load_config_to_level()\n        return self.load_one(level)\n\n    @contextmanager\n    def edit(self, level=None, validate=True):\n        # NOTE: we write to repo config by default, same as git config\n        level = level or \"repo\"\n        if self.dvc_dir is None and level in self.REPO_LEVELS:\n            raise ConfigError(\"Not inside a DVC repo\")\n\n        conf = self.load_one(level)\n        yield conf\n\n        conf = self._save_paths(conf, self.files[level])\n\n        merged_conf = self.load_config_to_level(level)\n        merge(merged_conf, conf)\n\n        if validate:\n            self.validate(merged_conf)\n\n        self._save_config(level, conf)\n        self.load(validate=validate)\n\n    @staticmethod\n    def validate(data):\n        from voluptuous import Invalid\n\n        try:\n            return get_compiled_schema()(data)\n        except Invalid as exc:\n            raise ConfigError(str(exc)) from None\n\n\ndef _parse_named(conf):\n    result: dict[str, dict] = {\"remote\": {}, \"machine\": {}, \"db\": {}}\n\n    for section, val in conf.items():\n        match = re_find(r'^\\s*(remote|machine|db)\\s*\"(.*)\"\\s*$', section)\n        if match:\n            key, name = match\n            result[key][name] = val\n        else:\n            result[section] = val\n\n    return result\n\n\ndef _pack_named(conf):\n    # Drop empty sections\n    result = compact(conf)\n\n    # Transform remote.name -> 'remote \"name\"'\n    for key in (\"remote\", \"machine\", \"db\"):\n        for name, val in conf[key].items():\n            result[f'{key} \"{name}\"'] = val\n        result.pop(key, None)\n\n    return result\n\n\ndef merge(into, update):\n    \"\"\"Merges second dict into first recursively\"\"\"\n    for key, val in update.items():\n        if isinstance(into.get(key), dict) and isinstance(val, dict):\n            merge(into[key], val)\n        else:\n            into[key] = val\n\n\ndef _lower_keys(data):\n    return {\n        k.lower(): _lower_keys(v) if isinstance(v, dict) else v for k, v in data.items()\n    }\n"
  },
  {
    "path": "dvc/config_schema.py",
    "content": "import os\nfrom typing import TYPE_CHECKING\nfrom urllib.parse import urlparse\n\nfrom funcy import once, walk_values\nfrom voluptuous import (\n    REMOVE_EXTRA,\n    All,\n    Any,\n    Coerce,\n    Exclusive,\n    Invalid,\n    Lower,\n    Marker,\n    Optional,\n    Range,\n    Schema,\n)\n\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from collections.abc import Iterator\n\nlogger = logger.getChild(__name__)\n\nBool = All(\n    Lower,\n    Any(\"true\", \"false\"),\n    lambda v: v == \"true\",\n    msg=\"expected true or false\",\n)\n\n\ndef supported_cache_type(types):\n    \"\"\"Checks if link type config option consists only of valid values.\n\n    Args:\n        types (list/string): type(s) of links that dvc should try out.\n    \"\"\"\n    if types is None:\n        return None\n    if isinstance(types, str):\n        types = [typ.strip() for typ in types.split(\",\")]\n\n    unsupported = set(types) - {\"reflink\", \"hardlink\", \"symlink\", \"copy\"}\n    if unsupported:\n        raise Invalid(\"Unsupported cache type(s): {}\".format(\", \".join(unsupported)))\n\n    return types\n\n\ndef Choices(*choices):  # noqa: N802\n    \"\"\"Checks that value belongs to the specified set of values\n\n    Args:\n        *choices: pass allowed values as arguments, or pass a list or\n            tuple as a single argument\n    \"\"\"\n    return Any(*choices, msg=\"expected one of {}\".format(\", \".join(choices)))\n\n\ndef ByUrl(mapping):  # noqa: N802\n    schemas = walk_values(Schema, mapping)\n\n    def validate(data):\n        if \"url\" not in data:\n            raise Invalid(\"expected 'url'\")\n\n        parsed = urlparse(data[\"url\"])\n        # Windows absolute paths should really have scheme == \"\" (local)\n        if os.name == \"nt\" and len(parsed.scheme) == 1 and not parsed.netloc:\n            return schemas[\"\"](data)\n        if not parsed.netloc:\n            return schemas[\"\"](data)\n        if parsed.scheme not in schemas:\n            raise Invalid(f\"Unsupported URL type {parsed.scheme}://\")\n\n        return schemas[parsed.scheme](data)\n\n    return validate\n\n\nclass ExpPath(str):\n    __slots__ = (\"def_path\",)\n\n    def_path: str\n\n    def __new__(cls, string, def_path):\n        ret = super().__new__(cls, string)\n        ret.def_path = def_path\n        return ret\n\n\nclass RelPath(str):\n    __slots__ = ()\n\n\nclass FeatureSchema(Schema):\n    def __init__(self, schema, required=False):\n        super().__init__(schema, required=required, extra=REMOVE_EXTRA)\n\n    @staticmethod\n    @once\n    def _log_deprecated(keys):\n        # only run this once per session\n        message = \"%s config option%s unsupported\"\n        paths = \", \".join(f\"'feature.{key}'\" for key in keys)\n        pluralize = \" is\" if len(keys) == 1 else \"s are\"\n        logger.warning(message, paths, pluralize)\n\n    def __call__(self, data):\n        ret = super().__call__(data)\n        extra_keys = data.keys() - ret.keys()\n        if extra_keys:\n            self._log_deprecated(sorted(extra_keys))\n        return ret\n\n\nDEPRECATED = \"==DEPRECATED==\"\n\nREMOTE_COMMON = {\n    \"url\": str,\n    \"checksum_jobs\": All(Coerce(int), Range(1)),\n    \"jobs\": All(Coerce(int), Range(1)),\n    Optional(\"worktree\"): Bool,\n    Optional(\"no_traverse\", description=DEPRECATED): Bool,  # obsoleted\n    Optional(\"version_aware\"): Bool,\n}\nLOCAL_COMMON = {\n    \"type\": supported_cache_type,\n    Optional(\"protected\", default=False, description=DEPRECATED): Bool,  # obsoleted\n    \"shared\": All(Lower, Choices(\"group\")),\n    Optional(\"slow_link_warning\", default=True): Bool,\n    Optional(\"verify\", default=False): Bool,\n}\nHTTP_COMMON = {\n    \"auth\": All(Lower, Choices(\"basic\", \"digest\", \"custom\")),\n    \"custom_auth_header\": str,\n    \"user\": str,\n    \"password\": str,\n    \"ask_password\": Bool,\n    \"ssl_verify\": Any(Bool, str),\n    \"method\": str,\n    \"connect_timeout\": All(Coerce(float), Range(0, min_included=True)),\n    \"read_timeout\": All(Coerce(float), Range(0, min_included=True)),\n    Optional(\"verify\", default=False): Bool,\n}\nWEBDAV_COMMON = {\n    \"user\": str,\n    \"password\": str,\n    \"ask_password\": Bool,\n    \"token\": str,\n    \"bearer_token_command\": str,\n    \"custom_auth_header\": str,\n    \"cert_path\": str,\n    \"key_path\": str,\n    \"timeout\": Coerce(int),\n    \"ssl_verify\": Any(Bool, str),\n    Optional(\"verify\", default=False): Bool,\n}\n\nREMOTE_SCHEMAS = {\n    \"\": LOCAL_COMMON | REMOTE_COMMON,\n    \"s3\": {\n        \"region\": str,\n        \"profile\": str,\n        \"credentialpath\": str,\n        \"configpath\": str,\n        \"endpointurl\": str,\n        \"access_key_id\": str,\n        \"secret_access_key\": str,\n        \"session_token\": str,\n        Optional(\n            \"listobjects\", default=False, description=DEPRECATED\n        ): Bool,  # obsoleted\n        Optional(\"use_ssl\", default=True): Bool,\n        Optional(\"allow_anonymous_login\", default=False): Bool,\n        \"ssl_verify\": Any(Bool, str),\n        \"sse\": str,\n        \"sse_kms_key_id\": str,\n        \"sse_customer_algorithm\": str,\n        \"sse_customer_key\": str,\n        \"acl\": str,\n        \"grant_read\": str,\n        \"grant_read_acp\": str,\n        \"grant_write_acp\": str,\n        \"grant_full_control\": str,\n        \"cache_regions\": bool,\n        \"read_timeout\": Coerce(int),\n        \"connect_timeout\": Coerce(int),\n        Optional(\"verify\", default=False): Bool,\n        **REMOTE_COMMON,\n    },\n    \"gs\": {\n        \"projectname\": str,\n        \"credentialpath\": str,\n        \"endpointurl\": str,\n        Optional(\"verify\", default=False): Bool,\n        Optional(\"allow_anonymous_login\", default=False): Bool,\n        **REMOTE_COMMON,\n    },\n    \"ssh\": {\n        \"type\": supported_cache_type,\n        \"port\": Coerce(int),\n        \"user\": str,\n        \"password\": str,\n        \"ask_password\": Bool,\n        \"passphrase\": str,\n        \"ask_passphrase\": Bool,\n        \"keyfile\": str,\n        \"timeout\": Coerce(int),\n        \"gss_auth\": Bool,\n        \"allow_agent\": Bool,\n        \"max_sessions\": Coerce(int),\n        Optional(\"verify\", default=False): Bool,\n        **REMOTE_COMMON,\n    },\n    \"hdfs\": {\n        \"user\": str,\n        \"kerb_ticket\": str,\n        \"replication\": int,\n        **REMOTE_COMMON,\n    },\n    \"webhdfs\": {\n        \"kerberos\": Bool,\n        \"kerberos_principal\": str,\n        \"proxy_to\": str,\n        \"ssl_verify\": Any(Bool, str),\n        \"token\": str,\n        \"use_https\": Bool,\n        \"user\": str,\n        \"password\": str,\n        \"data_proxy_target\": str,\n        Optional(\"verify\", default=False): Bool,\n        **REMOTE_COMMON,\n    },\n    \"azure\": {\n        \"connection_string\": str,\n        \"sas_token\": str,\n        \"account_name\": str,\n        \"account_key\": str,\n        \"tenant_id\": str,\n        \"client_id\": str,\n        \"client_secret\": str,\n        \"allow_anonymous_login\": Bool,\n        \"exclude_environment_credential\": Bool,\n        \"exclude_visual_studio_code_credential\": Bool,\n        \"exclude_shared_token_cache_credential\": Bool,\n        \"exclude_managed_identity_credential\": Bool,\n        Optional(\"verify\", default=False): Bool,\n        \"timeout\": Coerce(int),\n        \"read_timeout\": Coerce(int),\n        \"connection_timeout\": Coerce(int),\n        **REMOTE_COMMON,\n    },\n    \"oss\": {\n        \"oss_key_id\": str,\n        \"oss_key_secret\": str,\n        \"oss_endpoint\": str,\n        Optional(\"verify\", default=True): Bool,\n        **REMOTE_COMMON,\n    },\n    \"gdrive\": {\n        \"profile\": str,\n        \"gdrive_use_service_account\": Bool,\n        \"gdrive_client_id\": str,\n        \"gdrive_client_secret\": str,\n        \"gdrive_user_credentials_file\": str,\n        \"gdrive_service_account_user_email\": str,\n        \"gdrive_service_account_json_file_path\": str,\n        Optional(\"gdrive_trash_only\", default=False): Bool,\n        Optional(\"gdrive_acknowledge_abuse\", default=False): Bool,\n        Optional(\"verify\", default=True): Bool,\n        **REMOTE_COMMON,\n    },\n    \"http\": HTTP_COMMON | REMOTE_COMMON,\n    \"https\": HTTP_COMMON | REMOTE_COMMON,\n    \"webdav\": WEBDAV_COMMON | REMOTE_COMMON,\n    \"webdavs\": WEBDAV_COMMON | REMOTE_COMMON,\n    \"remote\": {str: object},  # Any of the above options are valid\n}\n\nSCHEMA = {\n    \"core\": {\n        \"remote\": Lower,\n        \"checksum_jobs\": All(Coerce(int), Range(1)),\n        Optional(\"interactive\", default=False): Bool,\n        Optional(\"analytics\", default=True): Bool,\n        Optional(\"hardlink_lock\", default=False): Bool,\n        Optional(\"no_scm\", default=False): Bool,\n        Optional(\"autostage\", default=False): Bool,\n        Optional(\"experiments\", description=DEPRECATED): Bool,  # obsoleted\n        Optional(\"check_update\", default=True): Bool,\n        \"site_cache_dir\": str,\n        \"machine\": Lower,\n    },\n    \"cache\": {\n        Marker(\"local\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"s3\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"gs\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"hdfs\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"webhdfs\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"ssh\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"azure\", description=DEPRECATED): str,  # obsoleted\n        # This is for default local cache\n        \"dir\": str,\n        **LOCAL_COMMON,\n    },\n    \"remote\": {\n        str: ByUrl(REMOTE_SCHEMAS),\n    },\n    \"state\": {\n        Marker(\"dir\", description=DEPRECATED): str,  # obsoleted\n        Marker(\"row_limit\", description=DEPRECATED): All(\n            Coerce(int), Range(1)\n        ),  # obsoleted\n        Marker(\"row_cleanup_quota\", description=DEPRECATED): All(\n            Coerce(int), Range(0, 100)\n        ),  # obsoleted\n    },\n    \"index\": {\n        Marker(\"dir\", description=DEPRECATED): str,  # obsoleted\n    },\n    \"machine\": {\n        str: {\n            \"cloud\": All(Lower, Choices(\"aws\", \"azure\")),\n            \"region\": All(Lower, Choices(\"us-west\", \"us-east\", \"eu-west\", \"eu-north\")),\n            \"image\": str,\n            \"spot\": Bool,\n            \"spot_price\": Coerce(float),\n            \"instance_hdd_size\": Coerce(int),\n            \"instance_type\": Lower,\n            \"instance_gpu\": Lower,\n            \"ssh_private\": str,\n            \"startup_script\": str,\n            \"setup_script\": str,\n        },\n    },\n    # section for experimental features\n    # only specified keys are validated, others get logged and then ignored/removed\n    \"feature\": FeatureSchema(\n        {\n            Optional(\"machine\", default=False): Bool,\n        },\n    ),\n    \"plots\": {\n        \"html_template\": str,\n        Optional(\"auto_open\", default=False): Bool,\n        \"out_dir\": str,\n    },\n    \"exp\": {\n        Marker(\"code\", description=DEPRECATED): str,\n        Marker(\"data\", description=DEPRECATED): str,\n        Marker(\"models\", description=DEPRECATED): str,\n        Marker(\"metrics\", description=DEPRECATED): str,\n        Marker(\"params\", description=DEPRECATED): str,\n        Marker(\"plots\", description=DEPRECATED): str,\n        Marker(\"live\", description=DEPRECATED): str,\n        \"auto_push\": Bool,\n        \"git_remote\": str,\n    },\n    \"parsing\": {\n        \"bool\": All(Lower, Choices(\"store_true\", \"boolean_optional\")),\n        \"list\": All(Lower, Choices(\"nargs\", \"append\")),\n    },\n    \"hydra\": {\n        Optional(\"enabled\", default=False): Bool,\n        Exclusive(\"config_dir\", \"config_source\"): str,\n        Exclusive(\"config_module\", \"config_source\"): str,\n        \"config_name\": str,\n        \"plugins_path\": str,\n    },\n    \"studio\": {\n        \"token\": str,\n        \"url\": str,\n        \"repo_url\": str,\n        Optional(\"offline\", default=False): Bool,\n    },\n    \"db\": {\n        str: {\n            \"url\": str,\n            \"username\": str,\n            \"password\": str,\n        },\n    },\n}\n\n\ndef config_vars_for_completion(d: dict = SCHEMA, path: str = \"\") -> \"Iterator[str]\":\n    for k, v in d.items():\n        if k in (\"machine\", \"feature\"):\n            continue\n        if isinstance(k, Marker):\n            if k.description == DEPRECATED:\n                continue\n            k = k.schema\n        if not isinstance(k, str):\n            continue\n\n        keypath = path + k\n        if isinstance(v, dict):\n            yield from config_vars_for_completion(v, keypath + \".\")\n        else:\n            yield keypath\n"
  },
  {
    "path": "dvc/daemon.py",
    "content": "\"\"\"Launch `dvc daemon` command in a separate detached process.\"\"\"\n\nimport inspect\nimport logging\nimport os\nimport subprocess\nimport sys\nfrom collections.abc import Mapping, Sequence\nfrom contextlib import nullcontext\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from contextlib import AbstractContextManager\n\nfrom dvc.env import DVC_DAEMON, DVC_DAEMON_LOGFILE\nfrom dvc.utils import fix_env, is_binary\nfrom dvc.utils.collections import ensure_list\n\nlogger = logger.getChild(__name__)\n\n\ndef _suppress_resource_warning(popen: subprocess.Popen) -> None:\n    \"\"\"Sets the returncode to avoid ResourceWarning when popen is garbage collected.\"\"\"\n    # only use for daemon processes.\n    # See https://bugs.python.org/issue38890.\n    popen.returncode = 0\n\n\ndef _win_detached_subprocess(args: Sequence[str], **kwargs) -> int:\n    assert os.name == \"nt\"\n\n    from subprocess import (  # type: ignore[attr-defined]\n        CREATE_NEW_PROCESS_GROUP,  # ty: ignore[unresolved-import]\n        CREATE_NO_WINDOW,  # ty: ignore[unresolved-import]\n        STARTF_USESHOWWINDOW,  # ty: ignore[unresolved-import]\n        STARTUPINFO,  # ty: ignore[unresolved-import]\n    )\n\n    # https://stackoverflow.com/a/7006424\n    # https://bugs.python.org/issue41619\n    creationflags = CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW\n\n    startupinfo = STARTUPINFO()\n    startupinfo.dwFlags |= STARTF_USESHOWWINDOW\n    popen = subprocess.Popen(  # noqa: S603\n        args,\n        close_fds=True,\n        shell=False,\n        startupinfo=startupinfo,\n        creationflags=creationflags,\n        **kwargs,\n    )\n    _suppress_resource_warning(popen)\n    return popen.pid\n\n\ndef _get_dvc_args() -> list[str]:\n    args = [sys.executable]\n    if not is_binary():\n        root_dir = os.path.abspath(os.path.dirname(__file__))\n        main_entrypoint = os.path.join(root_dir, \"__main__.py\")\n        args.append(main_entrypoint)\n    return args\n\n\ndef _fork_process() -> int:\n    assert os.name == \"posix\"\n\n    # NOTE: using os._exit instead of sys.exit, because dvc built\n    # with PyInstaller has trouble with SystemExit exception and throws\n    # errors such as \"[26338] Failed to execute script __main__\"\n    try:\n        pid = os.fork()  # type: ignore[attr-defined]\n        if pid > 0:\n            return pid\n    except OSError:\n        logger.exception(\"failed at first fork\")\n        os._exit(1)\n\n    os.setsid()  # type: ignore[attr-defined]\n\n    try:\n        pid = os.fork()  # type: ignore[attr-defined]\n        if pid > 0:\n            os._exit(0)\n    except OSError:\n        logger.exception(\"failed at second fork\")\n        os._exit(1)\n\n    # disconnect from the terminal\n    fd = os.open(os.devnull, os.O_RDWR)\n    for fd2 in range(3):\n        os.dup2(fd, fd2)\n    os.close(fd)\n    return pid\n\n\ndef _posix_detached_subprocess(args: Sequence[str], **kwargs) -> int:\n    # double fork and execute a subprocess so that there are no zombies\n    read_end, write_end = os.pipe()\n    pid = _fork_process()\n    if pid > 0:  # in parent\n        os.close(write_end)\n        pid_str = os.read(read_end, 32).decode(\"utf8\")\n        os.close(read_end)\n        return int(pid_str)\n\n    proc = subprocess.Popen(args, shell=False, close_fds=True, **kwargs)  # noqa: S603\n    os.close(read_end)\n    os.write(write_end, str(proc.pid).encode(\"utf8\"))\n    os.close(write_end)\n\n    exit_code = proc.wait()\n    os._exit(exit_code)\n\n\ndef _detached_subprocess(args: Sequence[str], **kwargs) -> int:\n    \"\"\"Run in a detached subprocess.\"\"\"\n    kwargs.setdefault(\"stdin\", subprocess.DEVNULL)\n    kwargs.setdefault(\"stdout\", subprocess.DEVNULL)\n    kwargs.setdefault(\"stderr\", subprocess.DEVNULL)\n\n    if os.name == \"nt\":\n        return _win_detached_subprocess(args, **kwargs)\n    return _posix_detached_subprocess(args, **kwargs)\n\n\ndef _map_log_level_to_flag() -> Optional[str]:\n    flags = {logging.DEBUG: \"-v\", logging.TRACE: \"-vv\"}  # type: ignore[attr-defined]\n    return flags.get(logger.getEffectiveLevel())\n\n\ndef daemon(args: list[str]) -> None:\n    \"\"\"Launch a `dvc daemon` command in a detached process.\n\n    Args:\n        args (list): list of arguments to append to `dvc daemon` command.\n    \"\"\"\n    if flag := _map_log_level_to_flag():\n        args = [*args, flag]\n    daemonize([\"daemon\", *args])\n\n\ndef _spawn(\n    args: list[str],\n    executable: Optional[Union[str, list[str]]] = None,\n    env: Optional[Mapping[str, str]] = None,\n    output_file: Optional[str] = None,\n) -> int:\n    file: AbstractContextManager[Any] = nullcontext()\n    kwargs = {}\n    if output_file:\n        file = open(output_file, \"ab\")  # noqa: SIM115\n        kwargs = {\"stdout\": file, \"stderr\": file}\n\n    if executable is None:\n        executable = _get_dvc_args()\n    else:\n        executable = ensure_list(executable)\n\n    with file:\n        return _detached_subprocess(executable + args, env=env, **kwargs)\n\n\ndef daemonize(args: list[str], executable: Union[str, list[str], None] = None) -> None:\n    if os.name not in (\"posix\", \"nt\"):\n        return\n\n    if os.environ.get(DVC_DAEMON):\n        logger.debug(\"skipping launching a new daemon.\")\n        return\n\n    env = fix_env()\n    env[DVC_DAEMON] = \"1\"\n    if not is_binary():\n        file_path = os.path.abspath(inspect.stack()[0][1])\n        env[\"PYTHONPATH\"] = os.path.dirname(os.path.dirname(file_path))\n\n    logger.debug(\"Trying to spawn %r\", args)\n    pid = _spawn(args, executable, env, output_file=env.get(DVC_DAEMON_LOGFILE))\n    logger.debug(\"Spawned %r with pid %s\", args, pid)\n"
  },
  {
    "path": "dvc/dagascii.py",
    "content": "\"\"\"Draws DAG in ASCII.\"\"\"\n\nimport math\nimport os\n\nfrom grandalf.graphs import Edge, Graph, Vertex\nfrom grandalf.layouts import SugiyamaLayout\nfrom grandalf.routing import EdgeViewer, route_with_lines\n\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\nclass VertexViewer:\n    \"\"\"Class to define vertex box boundaries that will be accounted for during\n    graph building by grandalf.\n\n    Args:\n        name (str): name of the vertex.\n    \"\"\"\n\n    HEIGHT = 3  # top and bottom box edges + text\n\n    def __init__(self, name):\n        self._h = self.HEIGHT  # top and bottom box edges + text\n        self._w = len(name) + 2  # right and left bottom edges + text\n\n    @property\n    def h(self):\n        \"\"\"Height of the box.\"\"\"\n        return self._h\n\n    @property\n    def w(self):\n        \"\"\"Width of the box.\"\"\"\n        return self._w\n\n\nclass AsciiCanvas:\n    \"\"\"Class for drawing in ASCII.\n\n    Args:\n        cols (int): number of columns in the canvas. Should be > 1.\n        lines (int): number of lines in the canvas. Should be > 1.\n    \"\"\"\n\n    TIMEOUT = 10\n\n    def __init__(self, cols, lines):\n        assert cols > 1\n        assert lines > 1\n\n        self.cols = cols\n        self.lines = lines\n\n        self.canvas = [[\" \"] * cols for line in range(lines)]\n\n    def draw(self):\n        \"\"\"Draws ASCII canvas on the screen.\"\"\"\n        lines = map(\"\".join, self.canvas)\n        return os.linesep.join(lines)\n\n    def point(self, x, y, char):\n        \"\"\"Create a point on ASCII canvas.\n\n        Args:\n            x (int): x coordinate. Should be >= 0 and < number of columns in\n                the canvas.\n            y (int): y coordinate. Should be >= 0 an < number of lines in the\n                canvas.\n            char (str): character to place in the specified point on the\n                canvas.\n        \"\"\"\n        assert len(char) == 1\n        assert x >= 0\n        assert x < self.cols\n        assert y >= 0\n        assert y < self.lines\n\n        self.canvas[y][x] = char\n\n    def line(self, x0, y0, x1, y1, char):  # noqa: C901, PLR0912\n        \"\"\"Create a line on ASCII canvas.\n\n        Args:\n            x0 (int): x coordinate where the line should start.\n            y0 (int): y coordinate where the line should start.\n            x1 (int): x coordinate where the line should end.\n            y1 (int): y coordinate where the line should end.\n            char (str): character to draw the line with.\n        \"\"\"\n        if x0 > x1:\n            x1, x0 = x0, x1\n            y1, y0 = y0, y1\n\n        dx = x1 - x0\n        dy = y1 - y0\n\n        if dx == 0 and dy == 0:\n            self.point(x0, y0, char)\n        elif abs(dx) >= abs(dy):\n            for x in range(x0, x1 + 1):\n                if dx == 0:\n                    y = y0\n                else:\n                    y = y0 + round((x - x0) * dy / float(dx))\n                self.point(x, y, char)\n        elif y0 < y1:\n            for y in range(y0, y1 + 1):\n                if dy == 0:\n                    x = x0\n                else:\n                    x = x0 + round((y - y0) * dx / float(dy))\n                self.point(x, y, char)\n        else:\n            for y in range(y1, y0 + 1):\n                if dy == 0:\n                    x = x0\n                else:\n                    x = x1 + round((y - y1) * dx / float(dy))\n                self.point(x, y, char)\n\n    def text(self, x, y, text):\n        \"\"\"Print a text on ASCII canvas.\n\n        Args:\n            x (int): x coordinate where the text should start.\n            y (int): y coordinate where the text should start.\n            text (str): string that should be printed.\n        \"\"\"\n        for i, char in enumerate(text):\n            self.point(x + i, y, char)\n\n    def box(self, x0, y0, width, height):\n        \"\"\"Create a box on ASCII canvas.\n\n        Args:\n            x0 (int): x coordinate of the box corner.\n            y0 (int): y coordinate of the box corner.\n            width (int): box width.\n            height (int): box height.\n        \"\"\"\n        assert width > 1\n        assert height > 1\n\n        width -= 1\n        height -= 1\n\n        for x in range(x0, x0 + width):\n            self.point(x, y0, \"-\")\n            self.point(x, y0 + height, \"-\")\n\n        for y in range(y0, y0 + height):\n            self.point(x0, y, \"|\")\n            self.point(x0 + width, y, \"|\")\n\n        self.point(x0, y0, \"+\")\n        self.point(x0 + width, y0, \"+\")\n        self.point(x0, y0 + height, \"+\")\n        self.point(x0 + width, y0 + height, \"+\")\n\n\ndef _build_sugiyama_layout(vertices, edges):\n    #\n    # Just a reminder about naming conventions:\n    # +------------X\n    # |\n    # |\n    # |\n    # |\n    # Y\n    #\n\n    vertices = {v: Vertex(f\" {v} \") for v in vertices}\n    # NOTE: reverting edges to correctly orientate the graph\n    edges = [Edge(vertices[e], vertices[s]) for s, e in edges]\n    vertices = vertices.values()\n    graph = Graph(vertices, edges)\n\n    for vertex in vertices:\n        vertex.view = VertexViewer(vertex.data)\n\n    # NOTE: determine min box length to create the best layout\n    minw = min(v.view.w for v in vertices)\n\n    for edge in edges:\n        edge.view = EdgeViewer()\n\n    sug = SugiyamaLayout(graph.C[0])\n    graph = graph.C[0]\n    roots = list(filter(lambda x: len(x.e_in()) == 0, graph.sV))\n\n    sug.init_all(roots=roots, optimize=True)\n\n    sug.yspace = VertexViewer.HEIGHT\n    sug.xspace = minw\n    sug.route_edge = route_with_lines  # ty: ignore[unresolved-attribute]\n\n    sug.draw()\n\n    return sug\n\n\ndef draw(vertices, edges):\n    \"\"\"Build a DAG and draw it in ASCII.\n\n    Args:\n        vertices (list): list of graph vertices.\n        edges (list): list of graph edges.\n\n    Returns:\n        str: ASCII representation\n\n    Example:\n        >>> from dvc.dagascii import draw\n        >>> vertices = [1, 2, 3, 4]\n        >>> edges = [(1, 2), (2, 3), (2, 4), (1, 4)]\n        >>> print(draw(vertices, edges))\n        +---+     +---+\n        | 3 |     | 4 |\n        +---+    *+---+\n          *    **   *\n          *  **     *\n          * *       *\n        +---+       *\n        | 2 |      *\n        +---+     *\n             *    *\n              *  *\n               **\n             +---+\n             | 1 |\n             +---+\n    \"\"\"\n\n    # NOTE: coordinates might me negative, so we need to shift\n    # everything to the positive plane before we actually draw it.\n    Xs = []  # noqa: N806\n    Ys = []  # noqa: N806\n\n    sug = _build_sugiyama_layout(vertices, edges)\n\n    for vertex in sug.g.sV:\n        # NOTE: moving boxes w/2 to the left\n        Xs.append(vertex.view.xy[0] - vertex.view.w / 2.0)\n        Xs.append(vertex.view.xy[0] + vertex.view.w / 2.0)\n        Ys.append(vertex.view.xy[1])\n        Ys.append(vertex.view.xy[1] + vertex.view.h)\n\n    for edge in sug.g.sE:\n        for x, y in edge.view._pts:\n            Xs.append(x)\n            Ys.append(y)\n\n    minx = min(Xs)\n    miny = min(Ys)\n    maxx = max(Xs)\n    maxy = max(Ys)\n\n    canvas_cols = math.ceil(math.ceil(maxx) - math.floor(minx)) + 1\n    canvas_lines = round(maxy - miny)\n\n    canvas = AsciiCanvas(canvas_cols, canvas_lines)\n\n    # NOTE: first draw edges so that node boxes could overwrite them\n    for edge in sug.g.sE:\n        assert len(edge.view._pts) > 1\n        for index in range(1, len(edge.view._pts)):\n            start = edge.view._pts[index - 1]\n            end = edge.view._pts[index]\n\n            start_x = round(start[0] - minx)\n            start_y = round(start[1] - miny)\n            end_x = round(end[0] - minx)\n            end_y = round(end[1] - miny)\n\n            assert start_x >= 0\n            assert start_y >= 0\n            assert end_x >= 0\n            assert end_y >= 0\n\n            canvas.line(start_x, start_y, end_x, end_y, \"*\")\n\n    for vertex in sug.g.sV:\n        # NOTE: moving boxes w/2 to the left\n        x = vertex.view.xy[0] - vertex.view.w / 2.0\n        y = vertex.view.xy[1]\n\n        canvas.box(\n            round(x - minx),\n            round(y - miny),\n            vertex.view.w,\n            vertex.view.h,\n        )\n\n        canvas.text(round(x - minx) + 1, round(y - miny) + 1, vertex.data)\n\n    return canvas.draw()\n"
  },
  {
    "path": "dvc/data_cloud.py",
    "content": "\"\"\"Manages dvc remotes that user can use with push/pull/status commands.\"\"\"\n\nfrom collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.config import NoRemoteError, RemoteConfigError\nfrom dvc.log import logger\nfrom dvc.utils.objects import cached_property\nfrom dvc_data.hashfile.db import get_index\nfrom dvc_data.hashfile.transfer import TransferResult\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc_data.hashfile.db import HashFileDB\n    from dvc_data.hashfile.hash_info import HashInfo\n    from dvc_data.hashfile.status import CompareStatusResult\n\nlogger = logger.getChild(__name__)\n\n\nclass Remote:\n    def __init__(self, name: str, path: str, fs: \"FileSystem\", *, index=None, **config):\n        self.path = path\n        self.fs = fs\n        self.name = name\n        self.index = index\n\n        self.worktree: bool = config.pop(\"worktree\", False)\n        self.config = config\n\n    @cached_property\n    def odb(self) -> \"HashFileDB\":\n        from dvc.cachemgr import CacheManager\n        from dvc_data.hashfile.db import get_odb\n        from dvc_data.hashfile.hash import DEFAULT_ALGORITHM\n\n        path = self.path\n        if self.worktree:\n            path = self.fs.join(path, \".dvc\", CacheManager.FILES_DIR, DEFAULT_ALGORITHM)\n        else:\n            path = self.fs.join(path, CacheManager.FILES_DIR, DEFAULT_ALGORITHM)\n        return get_odb(self.fs, path, hash_name=DEFAULT_ALGORITHM, **self.config)\n\n    @cached_property\n    def legacy_odb(self) -> \"HashFileDB\":\n        from dvc_data.hashfile.db import get_odb\n\n        path = self.path\n        return get_odb(self.fs, path, hash_name=\"md5-dos2unix\", **self.config)\n\n\ndef _split_legacy_hash_infos(\n    hash_infos: Iterable[\"HashInfo\"],\n) -> tuple[set[\"HashInfo\"], set[\"HashInfo\"]]:\n    from dvc.cachemgr import LEGACY_HASH_NAMES\n\n    legacy = set()\n    default = set()\n    for hi in hash_infos:\n        if hi.name in LEGACY_HASH_NAMES:\n            legacy.add(hi)\n        else:\n            default.add(hi)\n    return legacy, default\n\n\nclass DataCloud:\n    \"\"\"Class that manages dvc remotes.\n\n    Args:\n        repo (dvc.repo.Repo): repo instance that belongs to the repo that\n            we are working on.\n\n    Raises:\n        config.ConfigError: thrown when config has invalid format.\n    \"\"\"\n\n    def __init__(self, repo):\n        self.repo = repo\n\n    def get_remote(\n        self,\n        name: Optional[str] = None,\n        command: str = \"<command>\",\n    ) -> \"Remote\":\n        if not name:\n            name = self.repo.config[\"core\"].get(\"remote\")\n\n        if name:\n            from dvc.fs import get_cloud_fs\n\n            cls, config, fs_path = get_cloud_fs(self.repo.config, name=name)\n\n            if config.get(\"worktree\"):\n                version_aware = config.get(\"version_aware\")\n                if version_aware is False:\n                    raise RemoteConfigError(\n                        \"worktree remotes require version_aware cloud\"\n                    )\n                if version_aware is None:\n                    config[\"version_aware\"] = True\n\n            fs = cls(**config)\n            config[\"tmp_dir\"] = self.repo.site_cache_dir\n            if self.repo.data_index is not None:\n                index = self.repo.data_index.view((\"remote\", name))\n            else:\n                index = None\n            return Remote(name, fs_path, fs, index=index, **config)\n\n        if bool(self.repo.config[\"remote\"]):\n            error_msg = (\n                f\"no remote specified in {self.repo}. Setup default remote with\\n\"\n                \"    dvc remote default <remote name>\\n\"\n                \"or use:\\n\"\n                f\"    dvc {command} -r <remote name>\"\n            )\n        else:\n            error_msg = (\n                f\"no remote specified in {self.repo}. Create a default remote with\\n\"\n                \"    dvc remote add -d <remote name> <remote url>\"\n            )\n\n        raise NoRemoteError(error_msg)\n\n    def get_remote_odb(\n        self,\n        name: Optional[str] = None,\n        command: str = \"<command>\",\n        hash_name: str = \"md5\",\n    ) -> \"HashFileDB\":\n        from dvc.cachemgr import LEGACY_HASH_NAMES\n\n        remote = self.get_remote(name=name, command=command)\n        if remote.fs.version_aware or remote.worktree:\n            raise RemoteConfigError(\n                f\"'{command}' is unsupported for cloud versioned remotes\"\n            )\n        if hash_name in LEGACY_HASH_NAMES:\n            return remote.legacy_odb\n        return remote.odb\n\n    def _log_missing(self, status: \"CompareStatusResult\"):\n        if status.missing:\n            missing_desc = \"\\n\".join(\n                f\"name: {hash_info.obj_name}, {hash_info}\"\n                for hash_info in status.missing\n            )\n            logger.warning(\n                (\n                    \"Some of the cache files do not exist neither locally \"\n                    \"nor on remote. Missing cache files:\\n%s\"\n                ),\n                missing_desc,\n            )\n\n    def transfer(\n        self,\n        src_odb: \"HashFileDB\",\n        dest_odb: \"HashFileDB\",\n        objs: Iterable[\"HashInfo\"],\n        **kwargs,\n    ) -> \"TransferResult\":\n        from dvc_data.hashfile.transfer import transfer\n\n        return transfer(src_odb, dest_odb, objs, **kwargs)\n\n    def push(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        jobs: Optional[int] = None,\n        remote: Optional[str] = None,\n        odb: Optional[\"HashFileDB\"] = None,\n    ) -> \"TransferResult\":\n        \"\"\"Push data items in a cloud-agnostic way.\n\n        Args:\n            objs: objects to push to the cloud.\n            jobs: number of jobs that can be running simultaneously.\n            remote: optional name of remote to push to.\n                By default remote from core.remote config option is used.\n            odb: optional ODB to push to. Overrides remote.\n        \"\"\"\n        if odb is not None:\n            return self._push(objs, jobs=jobs, odb=odb)\n        legacy_objs, default_objs = _split_legacy_hash_infos(objs)\n        result = TransferResult(set(), set())\n        if legacy_objs:\n            odb = self.get_remote_odb(remote, \"push\", hash_name=\"md5-dos2unix\")\n            t, f = self._push(legacy_objs, jobs=jobs, odb=odb)\n            result.transferred.update(t)\n            result.failed.update(f)\n        if default_objs:\n            odb = self.get_remote_odb(remote, \"push\")\n            t, f = self._push(default_objs, jobs=jobs, odb=odb)\n            result.transferred.update(t)\n            result.failed.update(f)\n        return result\n\n    def _push(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        *,\n        jobs: Optional[int] = None,\n        odb: \"HashFileDB\",\n    ) -> \"TransferResult\":\n        from dvc.fs.callbacks import TqdmCallback\n\n        if odb.hash_name == \"md5-dos2unix\":\n            cache = self.repo.cache.legacy\n        else:\n            cache = self.repo.cache.local\n        with TqdmCallback(\n            desc=f\"Pushing to {odb.fs.unstrip_protocol(odb.path)}\",\n            unit=\"file\",\n        ) as cb:\n            return self.transfer(\n                cache,\n                odb,\n                objs,\n                jobs=jobs,\n                dest_index=get_index(odb),\n                cache_odb=cache,\n                validate_status=self._log_missing,\n                callback=cb,\n            )\n\n    def pull(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        jobs: Optional[int] = None,\n        remote: Optional[str] = None,\n        odb: Optional[\"HashFileDB\"] = None,\n    ) -> \"TransferResult\":\n        \"\"\"Pull data items in a cloud-agnostic way.\n\n        Args:\n            objs: objects to pull from the cloud.\n            jobs: number of jobs that can be running simultaneously.\n            remote: optional name of remote to pull from.\n                By default remote from core.remote config option is used.\n            odb: optional ODB to pull from. Overrides remote.\n        \"\"\"\n        if odb is not None:\n            return self._pull(objs, jobs=jobs, odb=odb)\n        legacy_objs, default_objs = _split_legacy_hash_infos(objs)\n        result = TransferResult(set(), set())\n        if legacy_objs:\n            odb = self.get_remote_odb(remote, \"pull\", hash_name=\"md5-dos2unix\")\n            assert odb.hash_name == \"md5-dos2unix\"\n            t, f = self._pull(legacy_objs, jobs=jobs, odb=odb)\n            result.transferred.update(t)\n            result.failed.update(f)\n        if default_objs:\n            odb = self.get_remote_odb(remote, \"pull\")\n            t, f = self._pull(default_objs, jobs=jobs, odb=odb)\n            result.transferred.update(t)\n            result.failed.update(f)\n        return result\n\n    def _pull(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        *,\n        jobs: Optional[int] = None,\n        odb: \"HashFileDB\",\n    ) -> \"TransferResult\":\n        from dvc.fs.callbacks import TqdmCallback\n\n        if odb.hash_name == \"md5-dos2unix\":\n            cache = self.repo.cache.legacy\n        else:\n            cache = self.repo.cache.local\n        with TqdmCallback(\n            desc=f\"Fetching from {odb.fs.unstrip_protocol(odb.path)}\",\n            unit=\"file\",\n        ) as cb:\n            return self.transfer(\n                odb,\n                cache,\n                objs,\n                jobs=jobs,\n                src_index=get_index(odb),\n                cache_odb=cache,\n                verify=odb.verify,\n                validate_status=self._log_missing,\n                callback=cb,\n            )\n\n    def status(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        jobs: Optional[int] = None,\n        remote: Optional[str] = None,\n        odb: Optional[\"HashFileDB\"] = None,\n    ):\n        \"\"\"Check status of data items in a cloud-agnostic way.\n\n        Args:\n            objs: objects to check status for.\n            jobs: number of jobs that can be running simultaneously.\n            remote: optional remote to compare\n                cache to. By default remote from core.remote config option\n                is used.\n            odb: optional ODB to check status from. Overrides remote.\n        \"\"\"\n        from dvc_data.hashfile.status import CompareStatusResult\n\n        if odb is not None:\n            return self._status(objs, jobs=jobs, odb=odb)\n        result = CompareStatusResult(set(), set(), set(), set())\n        legacy_objs, default_objs = _split_legacy_hash_infos(objs)\n        if legacy_objs:\n            odb = self.get_remote_odb(remote, \"status\", hash_name=\"md5-dos2unix\")\n            assert odb.hash_name == \"md5-dos2unix\"\n            o, m, n, d = self._status(legacy_objs, jobs=jobs, odb=odb)\n            result.ok.update(o)\n            result.missing.update(m)\n            result.new.update(n)\n            result.deleted.update(d)\n        if default_objs:\n            odb = self.get_remote_odb(remote, \"status\")\n            o, m, n, d = self._status(default_objs, jobs=jobs, odb=odb)\n            result.ok.update(o)\n            result.missing.update(m)\n            result.new.update(n)\n            result.deleted.update(d)\n        return result\n\n    def _status(\n        self,\n        objs: Iterable[\"HashInfo\"],\n        *,\n        jobs: Optional[int] = None,\n        odb: \"HashFileDB\",\n    ):\n        from dvc_data.hashfile.status import compare_status\n\n        if odb.hash_name == \"md5-dos2unix\":\n            cache = self.repo.cache.legacy\n        else:\n            cache = self.repo.cache.local\n        return compare_status(\n            cache,\n            odb,\n            objs,\n            jobs=jobs,\n            dest_index=get_index(odb),\n            cache_odb=cache,\n        )\n\n    def get_url_for(self, remote, checksum):\n        odb = self.get_remote_odb(remote)\n        path = odb.oid_to_path(checksum)\n        return odb.fs.unstrip_protocol(path)\n"
  },
  {
    "path": "dvc/database.py",
    "content": "import os\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom dataclasses import dataclass\nfrom tempfile import NamedTemporaryFile\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, Union\n\nfrom sqlalchemy import create_engine  # type: ignore[import]\nfrom sqlalchemy.engine import make_url as _make_url  # type: ignore[import]\nfrom sqlalchemy.exc import NoSuchModuleError  # type: ignore[import]\n\nfrom dvc import env\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.types import StrOrBytesPath\nfrom dvc.utils import env2bool\n\nif TYPE_CHECKING:\n    from sqlalchemy.engine import URL, Connectable, Engine\n    from sqlalchemy.sql.expression import Selectable  # type: ignore[import]\n\n\nlogger = logger.getChild(__name__)\n\n\ndef noop(_):\n    pass\n\n\ndef make_url(url: Union[\"URL\", str], **kwargs: Any) -> \"URL\":\n    return _make_url(url).set(**kwargs)\n\n\ndef url_from_config(config: Union[str, \"URL\", dict[str, str]]) -> \"URL\":\n    if isinstance(config, dict):\n        return make_url(**config)\n    return make_url(config)\n\n\n@contextmanager\ndef atomic_file(file: StrOrBytesPath, mode: str = \"w+b\"):\n    head, tail = os.path.split(os.fsdecode(file))\n    with NamedTemporaryFile(mode, prefix=tail + \"-\", dir=head, delete=False) as f:\n        yield f\n    os.replace(f.name, file)\n\n\n@dataclass\nclass Serializer:\n    sql: \"Union[str, Selectable]\"\n    con: \"Union[str, Connectable]\"\n    chunksize: int = 10_000\n\n    def to_csv(self, file: StrOrBytesPath, progress=noop):\n        import pandas as pd\n\n        idfs = pd.read_sql(self.sql, self.con, chunksize=self.chunksize)\n        with atomic_file(file) as f:\n            for i, df in enumerate(idfs):\n                df.to_csv(f, header=i == 0, index=False)\n                progress(len(df))\n\n    def to_json(self, file: StrOrBytesPath, progress=noop):  # noqa: ARG002\n        import pandas as pd\n\n        df = pd.read_sql(self.sql, self.con)\n        with atomic_file(file) as f:\n            df.to_json(f, orient=\"records\")\n\n    def export(self, file: StrOrBytesPath, format: str = \"csv\", progress=noop):  # noqa: A002\n        if format == \"json\":\n            return self.to_json(file, progress=progress)\n        return self.to_csv(file, progress=progress)\n\n\n@dataclass\nclass Client:\n    engine: \"Engine\"\n\n    def test_connection(self, onerror: Optional[Callable[[], Any]] = None) -> None:\n        try:\n            with self.engine.connect() as conn:\n                conn.exec_driver_sql(\"select 1\")\n        except Exception as exc:\n            if callable(onerror):\n                onerror()\n            logger.exception(  # noqa: LOG007\n                \"Could not connect to the database. \"\n                \"Check your database credentials and try again.\",\n                exc_info=False,\n            )\n            raise DvcException(\"The database returned the following error\") from exc\n\n    def export(\n        self,\n        sql: \"Union[str, Selectable]\",\n        file: StrOrBytesPath,\n        format: str = \"csv\",  # noqa: A002\n        progress=noop,\n    ) -> None:\n        con = self.engine.connect()\n        if format == \"csv\":\n            con = con.execution_options(stream_results=True)  # use server-side cursors\n\n        with con:\n            serializer = Serializer(sql, con)\n            return serializer.export(file, format=format, progress=progress)\n\n\n@contextmanager\ndef handle_error(url: \"URL\"):\n    try:\n        yield\n    except (ModuleNotFoundError, NoSuchModuleError) as e:\n        # TODO: write installation instructions\n        driver = url.drivername\n        raise DvcException(f\"Could not load database driver for {driver!r}\") from e\n\n\n@contextmanager\ndef client(\n    url_or_config: Union[str, \"URL\", dict[str, str]], **engine_kwargs: Any\n) -> Iterator[Client]:\n    url = url_from_config(url_or_config)\n    echo = env2bool(env.DVC_SQLALCHEMY_ECHO, False)\n    engine_kwargs.setdefault(\"echo\", echo)\n\n    with handle_error(url):\n        engine = create_engine(url, **engine_kwargs)\n\n    try:\n        yield Client(engine)\n    finally:\n        engine.dispose()\n"
  },
  {
    "path": "dvc/dependency/__init__.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Mapping\nfrom typing import Any\n\nfrom dvc.output import ARTIFACT_SCHEMA, DIR_FILES_SCHEMA, Output\n\nfrom .base import Dependency\nfrom .dataset import DatasetDependency\nfrom .db import DbDependency\nfrom .param import ParamsDependency\nfrom .repo import RepoDependency\n\n# NOTE: schema for dependencies is basically the same as for outputs, but\n# without output-specific entries like 'cache' (whether or not output is\n# cached, see -o and -O flags for `dvc run`) and 'metric' (whether or not\n# output is a metrics file and how to parse it, see `-M` flag for `dvc run`).\nSCHEMA: Mapping[str, Any] = {\n    **ARTIFACT_SCHEMA,\n    **RepoDependency.REPO_SCHEMA,\n    **DbDependency.DB_SCHEMA,\n    Output.PARAM_FILES: [DIR_FILES_SCHEMA],\n    Output.PARAM_FS_CONFIG: dict,\n}\n\n\ndef _get(stage, p, info, **kwargs):\n    d = info or {}\n    params = d.pop(ParamsDependency.PARAM_PARAMS, None)\n    repo = d.pop(RepoDependency.PARAM_REPO, None)\n\n    if params:\n        return ParamsDependency(stage, p, params)\n    if DbDependency.PARAM_DB in d:\n        return DbDependency(stage, d)\n\n    assert p\n    if DatasetDependency.is_dataset(p):\n        return DatasetDependency(stage, p, info)\n    if repo:\n        return RepoDependency(repo, stage, p, info)\n    return Dependency(stage, p, info, **kwargs)\n\n\ndef loadd_from(stage, d_list) -> list[Dependency]:\n    ret = []\n    for d in d_list:\n        p = d.pop(Output.PARAM_PATH, None)\n        files = d.pop(Output.PARAM_FILES, None)\n        hash_name = d.pop(Output.PARAM_HASH, None)\n        fs_config = d.pop(Output.PARAM_FS_CONFIG, None)\n        ret.append(\n            _get(stage, p, d, files=files, hash_name=hash_name, fs_config=fs_config)\n        )\n    return ret\n\n\ndef loads_from(stage, s_list, erepo=None, fs_config=None, db=None):\n    assert isinstance(s_list, list)\n    info = {RepoDependency.PARAM_REPO: erepo} if erepo else {}\n    if db:\n        info.update({\"db\": db})\n    return [_get(stage, s, info.copy(), fs_config=fs_config) for s in s_list]\n\n\ndef _merge_params(s_list) -> dict[str, list[str]]:\n    d = defaultdict(list)\n    default_file = ParamsDependency.DEFAULT_PARAMS_FILE\n\n    # figure out completely tracked params file, and ignore specific keys\n    wholly_tracked: set[str] = set()\n    for key in s_list:\n        if not isinstance(key, dict):\n            continue\n        wholly_tracked.update(k for k, params in key.items() if not params)\n\n    for key in s_list:\n        if isinstance(key, str):\n            if default_file not in wholly_tracked:\n                d[default_file].append(key)\n            continue\n\n        if not isinstance(key, dict):\n            msg = \"Only list of str/dict is supported. Got: \"\n            msg += f\"'{type(key).__name__}'.\"\n            raise ValueError(msg)  # noqa: TRY004\n\n        for k, params in key.items():\n            if k in wholly_tracked:\n                d[k] = []\n                continue\n            if not isinstance(params, list):\n                msg = \"Expected list of params for custom params file \"\n                msg += f\"'{k}', got '{type(params).__name__}'.\"\n                raise ValueError(msg)  # noqa: TRY004\n            d[k].extend(params)\n    return d\n\n\ndef loads_params(stage, s_list):\n    d = _merge_params(s_list)\n    return [ParamsDependency(stage, path, params) for path, params in d.items()]\n"
  },
  {
    "path": "dvc/dependency/base.py",
    "content": "from dvc.exceptions import DvcException\nfrom dvc.fs import download as fs_download\nfrom dvc.output import Output\n\n\nclass DependencyDoesNotExistError(DvcException):\n    def __init__(self, path):\n        msg = f\"dependency '{path}' does not exist\"\n        super().__init__(msg)\n\n\nclass DependencyIsNotFileOrDirError(DvcException):\n    def __init__(self, path):\n        msg = f\"dependency '{path}' is not a file or directory\"\n        super().__init__(msg)\n\n\nclass DependencyIsStageFileError(DvcException):\n    def __init__(self, path):\n        super().__init__(f\"DVC file '{path}' cannot be a dependency.\")\n\n\nclass Dependency(Output):\n    IS_DEPENDENCY = True\n\n    DoesNotExistError: type[DvcException] = DependencyDoesNotExistError\n    IsNotFileOrDirError: type[DvcException] = DependencyIsNotFileOrDirError\n    IsStageFileError: type[DvcException] = DependencyIsStageFileError\n\n    def workspace_status(self) -> dict[str, str]:\n        if self.fs.version_aware:\n            old_fs_path = self.fs_path\n            try:\n                self.fs_path = self.fs.version_path(self.fs_path, None)\n                if self.changed_meta():\n                    return {str(self): \"update available\"}\n            finally:\n                self.fs_path = old_fs_path\n        return super().workspace_status()\n\n    def update(self, rev=None):\n        if self.fs.version_aware:\n            self.fs_path = self.fs.version_path(self.fs_path, rev)\n            self.meta = self.get_meta()\n            self.fs_path = self.fs.version_path(self.fs_path, self.meta.version_id)\n\n    def download(self, to, jobs=None):\n        return fs_download(self.fs, self.fs_path, to.fs_path, jobs=jobs)\n\n    def save(self):\n        super().save()\n        if self.fs.version_aware:\n            self.fs_path = self.fs.version_path(self.fs_path, self.meta.version_id)\n\n    def dumpd(self, **kwargs):\n        if self.fs.version_aware:\n            kwargs[\"with_files\"] = True\n        return super().dumpd(**kwargs)\n"
  },
  {
    "path": "dvc/dependency/dataset.py",
    "content": "from typing import TYPE_CHECKING, Any, ClassVar\nfrom urllib.parse import urlparse\n\nfrom funcy import compact, merge\n\nfrom dvc.exceptions import DvcException\nfrom dvc_data.hashfile.hash_info import HashInfo\n\nfrom .db import AbstractDependency\n\nif TYPE_CHECKING:\n    from dvc.stage import Stage\n\n\nclass DatasetDependency(AbstractDependency):\n    PARAM_DATASET = \"dataset\"\n    DATASET_SCHEMA: ClassVar[dict] = {PARAM_DATASET: dict}\n\n    def __init__(self, stage: \"Stage\", p, info, *args, **kwargs):\n        super().__init__(stage, info, *args, **kwargs)\n        self.def_path = p\n        self.name = urlparse(p).netloc\n        dataset_info = info.get(self.PARAM_DATASET) or {}\n        self.hash_info = HashInfo(self.PARAM_DATASET, dataset_info)  # type: ignore[arg-type]\n        self.hash_name = self.PARAM_DATASET\n\n    def __repr__(self):\n        return f\"{self.__class__.__name__}({self.def_path!r})\"\n\n    def __str__(self):\n        return self.def_path\n\n    @classmethod\n    def is_dataset(cls, p: str):\n        return urlparse(p).scheme == \"ds\"\n\n    @property\n    def protocol(self):\n        return None\n\n    def dumpd(self, **kwargs):\n        return compact({self.PARAM_PATH: self.def_path, **self.hash_info.to_dict()})\n\n    def fill_values(self, values=None):\n        \"\"\"Load params values dynamically.\"\"\"\n        self.hash_info = HashInfo(\n            self.PARAM_DATASET, merge(self.hash_info.value, values or {})\n        )\n\n    def workspace_status(self):\n        ds = self.repo.datasets[self.name]\n        if not ds.lock:\n            return {str(self): \"not in sync\"}\n\n        info: dict[str, Any] = self.hash_info.value if self.hash_info else {}  # type: ignore[assignment]\n        lock = self.repo.datasets._lock_from_info(info)\n        if not lock:\n            return {str(self): \"new\"}\n        if lock != ds.lock:\n            return {str(self): \"modified\"}\n        return {}\n\n    def status(self):\n        return self.workspace_status()\n\n    def get_hash(self):\n        ds = self.repo.datasets[self.name]\n        if not ds.lock:\n            if ds._invalidated:\n                raise DvcException(\n                    \"Dataset information is not in sync. \"\n                    f\"Run 'dvc ds update {self.name}' to sync.\"\n                )\n            raise DvcException(\"Dataset information missing from dvc.lock file\")\n        return HashInfo(self.PARAM_DATASET, ds.lock.to_dict())  # type: ignore[arg-type]\n\n    def save(self):\n        self.hash_info = self.get_hash()\n\n    def download(self, to, jobs=None):\n        raise NotImplementedError\n\n    def update(self, rev=None):\n        raise NotImplementedError\n"
  },
  {
    "path": "dvc/dependency/db.py",
    "content": "from collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional\n\nfrom funcy import compact, log_durations\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nfrom .base import Dependency\n\nif TYPE_CHECKING:\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.stage import Stage\n\nlogger = logger.getChild(__name__)\n\n\n@contextmanager\ndef download_progress(to: \"Output\") -> Iterator[Callable[[int], Any]]:\n    from dvc.ui import ui\n    from dvc.ui._rich_progress import DbDownloadProgress\n\n    with (\n        log_durations(logger.debug, f\"Saving to {to}\"),\n        DbDownloadProgress(\n            console=ui.error_console,\n        ) as progress,\n    ):\n        task = progress.add_task(\"Saving\", total=None, output=to)\n        yield lambda n: progress.advance(task, advance=n)\n        progress.update(task, description=\"Saved\", total=0)\n\n\nclass AbstractDependency(Dependency):\n    \"\"\"Dependency without workspace/fs/fs_path\"\"\"\n\n    def __init__(self, stage: \"Stage\", info: dict[str, Any], *args, **kwargs):\n        self.repo: Repo = stage.repo\n        self.stage = stage\n        self.fs = None\n        self.fs_path = None\n        self.def_path = None  # type: ignore[assignment]\n        self.info = info or {}\n\n    @property\n    def is_in_repo(self):\n        return False\n\n\nclass DbDependency(AbstractDependency):\n    PARAM_CONNECTION = \"connection\"\n    PARAM_DB = \"db\"\n    PARAM_QUERY = \"query\"\n    PARAM_TABLE = \"table\"\n    PARAM_FILE_FORMAT = \"file_format\"\n    DB_SCHEMA: ClassVar[dict] = {\n        PARAM_DB: {\n            PARAM_QUERY: str,\n            PARAM_CONNECTION: str,\n            PARAM_FILE_FORMAT: str,\n            PARAM_TABLE: str,\n        }\n    }\n\n    def __init__(self, stage: \"Stage\", info, *args, **kwargs):\n        super().__init__(stage, info, *args, **kwargs)\n        self.db_info: dict[str, str] = self.info.get(self.PARAM_DB, {})\n        self.connection = self.db_info.get(self.PARAM_CONNECTION)\n\n    @property\n    def sql(self) -> Optional[str]:\n        return self.db_info.get(self.PARAM_QUERY) or self.db_info.get(self.PARAM_TABLE)\n\n    def __repr__(self):\n        return \"{}: {}\".format(\n            self.__class__.__name__, \"\".join(f\"{k}={v}\" for k, v in self.info.items())\n        )\n\n    def __str__(self):\n        from dvc.utils.humanize import truncate_text\n\n        return truncate_text(self.sql or \"\", 50)\n\n    def workspace_status(self):\n        return False  # no workspace to check\n\n    def status(self):\n        return self.workspace_status()\n\n    def save(self):\n        \"\"\"nothing to save.\"\"\"\n\n    def dumpd(self, **kwargs):\n        db_info = compact(self.db_info)\n        return {self.PARAM_DB: db_info} if db_info else {}\n\n    def update(self, rev=None):\n        \"\"\"nothing to update.\"\"\"\n\n    def download(\n        self,\n        to: \"Output\",\n        jobs: Optional[int] = None,  # noqa: ARG002\n        file_format: Optional[str] = None,\n        **kwargs: Any,\n    ) -> None:\n        from dvc.database import client\n        from dvc.ui import ui\n\n        sql = self.sql\n        if not sql:\n            raise DvcException(\"Cannot download: no query or table specified\")\n\n        db_config = self.repo.config.get(self.PARAM_DB, {})\n        config = db_config.get(self.connection)\n        if not config:\n            raise DvcException(f\"connection {self.connection} not found in config\")\n\n        file_format = file_format or self.db_info.get(self.PARAM_FILE_FORMAT, \"csv\")\n        assert file_format\n        with client(config) as db:\n            msg = \"Testing connection\"\n            with log_durations(logger.debug, msg), ui.status(msg) as status:\n                db.test_connection(onerror=status.stop)\n            with download_progress(to) as progress:\n                db.export(sql, to.fs_path, format=file_format, progress=progress)\n"
  },
  {
    "path": "dvc/dependency/param.py",
    "content": "import os\nimport typing\nfrom collections import defaultdict\nfrom typing import TYPE_CHECKING, Any, Optional\n\nimport dpath\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.utils.serialize import ParseError, load_path\nfrom dvc_data.hashfile.hash_info import HashInfo\n\nfrom .base import Dependency\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\nclass MissingParamsError(DvcException):\n    pass\n\n\nclass MissingParamsFile(DvcException):\n    pass\n\n\nclass ParamsIsADirectoryError(DvcException):\n    pass\n\n\nclass BadParamFileError(DvcException):\n    pass\n\n\ndef read_param_file(\n    fs: \"FileSystem\",\n    path: str,\n    key_paths: Optional[list[str]] = None,\n    flatten: bool = False,\n    **load_kwargs,\n) -> Any:\n    config = load_path(path, fs, **load_kwargs)\n    if not key_paths:\n        return config\n\n    ret = {}\n    if flatten:\n        for key_path in key_paths:\n            try:\n                ret[key_path] = dpath.get(config, key_path, separator=\".\")\n            except KeyError:\n                continue\n        return ret\n\n    from copy import deepcopy\n\n    from dpath import merge\n    from funcy import distinct\n\n    for key_path in distinct(key_paths):\n        merge(\n            ret,\n            deepcopy(dpath.search(config, key_path, separator=\".\")),\n            separator=\".\",\n        )\n    return ret\n\n\nclass ParamsDependency(Dependency):\n    PARAM_PARAMS = \"params\"\n    DEFAULT_PARAMS_FILE = \"params.yaml\"\n\n    def __init__(self, stage, path, params=None, repo=None):\n        self.params = list(params) if params else []\n        hash_info = HashInfo()\n        if isinstance(params, dict):\n            hash_info = HashInfo(self.PARAM_PARAMS, params)  # type: ignore[arg-type]\n        repo = repo or stage.repo\n        path = path or os.path.join(repo.root_dir, self.DEFAULT_PARAMS_FILE)\n        super().__init__(stage, path, repo=repo)\n        self.hash_name = self.PARAM_PARAMS\n        self.hash_info = hash_info\n\n    def dumpd(self, **kwargs):\n        ret = super().dumpd()\n        if not self.hash_info:\n            ret[self.PARAM_PARAMS] = self.params or {}\n        return ret\n\n    def fill_values(self, values=None):\n        \"\"\"Load params values dynamically.\"\"\"\n        if values is None:\n            return\n\n        info = {}\n        if not self.params:\n            info.update(values)\n        for param in self.params:\n            if param in values:\n                info[param] = values[param]\n        self.hash_info = HashInfo(self.PARAM_PARAMS, info)  # type: ignore[arg-type]\n\n    def read_params(\n        self, flatten: bool = True, **kwargs: typing.Any\n    ) -> dict[str, typing.Any]:\n        try:\n            self.validate_filepath()\n        except MissingParamsFile:\n            return {}\n\n        assert self.repo\n        try:\n            return read_param_file(\n                self.repo.fs,\n                self.fs_path,\n                list(self.params) if self.params else None,\n                flatten=flatten,\n            )\n        except ParseError as exc:\n            raise BadParamFileError(f\"Unable to read parameters from '{self}'\") from exc\n\n    def workspace_status(self):\n        if not self.exists:\n            return {str(self): \"deleted\"}\n        if self.hash_info.value is None:\n            return {str(self): \"new\"}\n\n        from funcy import ldistinct\n\n        status: dict[str, Any] = defaultdict(dict)\n        info = self.hash_info.value if self.hash_info else {}\n        assert isinstance(info, dict)\n        actual = self.read_params()\n\n        # NOTE: we want to preserve the order of params as specified in the\n        # status. In case of tracking the whole file, the order is top-level\n        # keys in the file and then the keys in the `info` from `dvc.lock`\n        # (which are alphabetically sorted).\n        params = self.params or ldistinct([*actual.keys(), *info.keys()])\n        for param in params:\n            if param not in actual:\n                st = \"deleted\"\n            elif param not in info:\n                st = \"new\"\n            elif actual[param] != info[param]:\n                if (\n                    isinstance(actual[param], tuple)\n                    and list(actual[param]) == info[param]\n                ):\n                    continue\n                st = \"modified\"\n            else:\n                continue\n\n            status[str(self)][param] = st\n\n        return status\n\n    def status(self):\n        return self.workspace_status()\n\n    def validate_filepath(self):\n        if not self.exists:\n            raise MissingParamsFile(f\"Parameters file '{self}' does not exist\")\n        if self.isdir():\n            raise ParamsIsADirectoryError(\n                f\"'{self}' is a directory, expected a parameters file\"\n            )\n\n    def get_hash(self):\n        info = self.read_params()\n\n        missing_params = set(self.params) - set(info.keys())\n        if missing_params:\n            raise MissingParamsError(\n                \"Parameters '{}' are missing from '{}'.\".format(\n                    \", \".join(missing_params), self\n                )\n            )\n\n        return HashInfo(self.PARAM_PARAMS, info)  # type: ignore[arg-type]\n\n    def save(self):\n        if not self.exists:\n            raise self.DoesNotExistError(self)\n\n        if not self.isfile() and not self.isdir():\n            raise self.IsNotFileOrDirError(self)\n\n        self.ignore()\n        self.hash_info = self.get_hash()\n"
  },
  {
    "path": "dvc/dependency/repo.py",
    "content": "from copy import deepcopy\nfrom typing import TYPE_CHECKING, Any, ClassVar, Optional, Union\n\nimport voluptuous as vol\n\nfrom dvc.utils import as_posix\n\nfrom .base import Dependency\n\nif TYPE_CHECKING:\n    from dvc.fs import DVCFileSystem\n    from dvc.output import Output\n    from dvc.stage import Stage\n    from dvc_data.hashfile.hash_info import HashInfo\n\n\nclass RepoDependency(Dependency):\n    PARAM_REPO = \"repo\"\n    PARAM_URL = \"url\"\n    PARAM_REV = \"rev\"\n    PARAM_REV_LOCK = \"rev_lock\"\n    PARAM_CONFIG = \"config\"\n    PARAM_REMOTE = \"remote\"\n\n    REPO_SCHEMA: ClassVar[dict] = {\n        PARAM_REPO: {\n            vol.Required(PARAM_URL): str,\n            PARAM_REV: str,\n            PARAM_REV_LOCK: str,\n            PARAM_CONFIG: vol.Any(str, dict),\n            PARAM_REMOTE: vol.Any(str, dict),\n        }\n    }\n\n    def __init__(self, def_repo: dict[str, Any], stage: \"Stage\", *args, **kwargs):\n        self.def_repo = def_repo\n        super().__init__(stage, *args, **kwargs)\n\n        self.fs = self._make_fs()\n        self.fs_path = as_posix(self.fs.normpath(self.def_path))\n\n    def _parse_path(self, fs, fs_path):  # noqa: ARG002\n        return None\n\n    @property\n    def is_in_repo(self):\n        return False\n\n    def __str__(self):\n        return f\"{self.def_path} ({self.def_repo[self.PARAM_URL]})\"\n\n    def workspace_status(self):\n        current = self._make_fs(locked=True).repo.get_rev()\n        updated = self._make_fs(locked=False).repo.get_rev()\n\n        if current != updated:\n            return {str(self): \"update available\"}\n\n        return {}\n\n    def status(self):\n        return self.workspace_status()\n\n    def save(self):\n        rev = self.fs.repo.get_rev()\n        if self.def_repo.get(self.PARAM_REV_LOCK) is None:\n            self.def_repo[self.PARAM_REV_LOCK] = rev\n\n    @classmethod\n    def _dump_def_repo(cls, def_repo) -> dict[str, str]:\n        repo = {cls.PARAM_URL: def_repo[cls.PARAM_URL]}\n\n        rev = def_repo.get(cls.PARAM_REV)\n        if rev:\n            repo[cls.PARAM_REV] = def_repo[cls.PARAM_REV]\n\n        rev_lock = def_repo.get(cls.PARAM_REV_LOCK)\n        if rev_lock:\n            repo[cls.PARAM_REV_LOCK] = rev_lock\n\n        config = def_repo.get(cls.PARAM_CONFIG)\n        if config:\n            repo[cls.PARAM_CONFIG] = config\n\n        remote = def_repo.get(cls.PARAM_REMOTE)\n        if remote:\n            repo[cls.PARAM_REMOTE] = remote\n        return repo\n\n    def dumpd(self, **kwargs) -> dict[str, Union[str, dict[str, str]]]:\n        return {\n            self.PARAM_PATH: self.def_path,\n            self.PARAM_REPO: self._dump_def_repo(self.def_repo),\n        }\n\n    def download(self, to: \"Output\", jobs: Optional[int] = None):\n        from dvc.fs import LocalFileSystem\n\n        files = super().download(to=to, jobs=jobs)\n        if not isinstance(to.fs, LocalFileSystem):\n            return\n\n        hashes: list[tuple[str, HashInfo, dict[str, Any]]] = []\n        for src_path, dest_path, maybe_info in files:\n            try:\n                info = maybe_info or self.fs.info(src_path)\n                hash_info = info[\"dvc_info\"][\"entry\"].hash_info\n                dest_info = to.fs.info(dest_path)\n            except (KeyError, AttributeError):\n                # If no hash info found, just keep going and output will be hashed later\n                continue\n            if hash_info:\n                hashes.append((dest_path, hash_info, dest_info))\n        cache = to.cache if to.use_cache else to.local_cache\n        cache.state.save_many(hashes, to.fs)\n\n    def update(self, rev: Optional[str] = None):\n        if rev:\n            self.def_repo[self.PARAM_REV] = rev\n        self.fs = self._make_fs(rev=rev, locked=False)\n        self.def_repo[self.PARAM_REV_LOCK] = self.fs.repo.get_rev()\n\n    def changed_checksum(self) -> bool:\n        # From current repo point of view what describes RepoDependency is its\n        # origin project url and rev_lock, and it makes RepoDependency\n        # immutable, hence its impossible for checksum to change.\n        return False\n\n    def _make_fs(\n        self, rev: Optional[str] = None, locked: bool = True\n    ) -> \"DVCFileSystem\":\n        from dvc.config import Config\n        from dvc.fs import DVCFileSystem\n\n        rem = self.def_repo.get(\"remote\")\n        if isinstance(rem, dict):\n            remote = None\n            remote_config = rem\n        else:\n            remote = rem\n            remote_config = None\n\n        conf = self.def_repo.get(\"config\", {})\n        if isinstance(conf, dict):\n            config = deepcopy(conf)\n        else:\n            config = Config.load_file(conf)\n\n        # Setup config to the new DVCFileSystem to use the remote repo, but rely on the\n        # local cache instead of the remote's cache. This avoids re-streaming of data,\n        # but messes up the call to `_get_remote_config()` downstream, which will need\n        # to ignore cache parameters.\n        assert self.repo\n        config[\"cache\"] = self.repo.config[\"cache\"]\n        config[\"cache\"][\"dir\"] = self.repo.cache.local_cache_dir\n\n        return DVCFileSystem(\n            repo=self.def_repo[self.PARAM_URL],\n            rev=rev or self._get_rev(locked=locked),\n            subrepos=True,\n            config=config,\n            remote=remote,\n            remote_config=remote_config,\n        )\n\n    def _get_rev(self, locked: bool = True):\n        d = self.def_repo\n        return (d.get(self.PARAM_REV_LOCK) if locked else None) or d.get(self.PARAM_REV)\n"
  },
  {
    "path": "dvc/dirs.py",
    "content": "import os\nfrom typing import Optional\n\nimport platformdirs\n\nfrom . import env\n\nAPPNAME = \"dvc\"\nAPPAUTHOR = \"iterative\"\n\n\ndef system_config_dir():\n    return os.getenv(env.DVC_SYSTEM_CONFIG_DIR) or platformdirs.site_config_dir(\n        APPNAME, APPAUTHOR\n    )\n\n\ndef global_config_dir():\n    return os.getenv(env.DVC_GLOBAL_CONFIG_DIR) or platformdirs.user_config_dir(\n        APPNAME, APPAUTHOR\n    )\n\n\ndef site_cache_dir(config_site_cache_dir: Optional[str] = None):\n    from platformdirs import PlatformDirs\n    from platformdirs.unix import Unix\n\n    if dvc_site_cache_dir := os.getenv(env.DVC_SITE_CACHE_DIR):\n        return dvc_site_cache_dir\n\n    if config_site_cache_dir is not None:\n        return config_site_cache_dir\n\n    if issubclass(Unix, PlatformDirs):\n        # Return the cache directory shared by users, e.g. `/var/tmp/$appname`\n        # NOTE: platformdirs>=5 changed `site_cache_dir` to return /var/cache/$appname.\n        # as the following path is considered insecure.\n        # For details, see: https://github.com/tox-dev/platformdirs/pull/239\n\n        # FIXME: keeping the old behavior temporarily to avoid dependency conflict.\n        #        In the future, consider migrating to a more secure directory.\n        return f\"/var/tmp/{APPNAME}\"  # noqa: S108\n\n    return platformdirs.site_cache_dir(APPNAME, APPAUTHOR, opinion=True)\n"
  },
  {
    "path": "dvc/dvcfile.py",
    "content": "import contextlib\nimport os\nfrom typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, TypeVar, Union\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.stage import serialize\nfrom dvc.stage.exceptions import (\n    StageFileBadNameError,\n    StageFileDoesNotExistError,\n    StageFileIsNotDvcFileError,\n)\nfrom dvc.utils import relpath\nfrom dvc.utils.collections import apply_diff\nfrom dvc.utils.objects import cached_property\nfrom dvc.utils.serialize import dump_yaml, modify_yaml\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.types import StrOrBytesPath\n\n    from .parsing import DataResolver\n    from .stage import Stage\n\nlogger = logger.getChild(__name__)\n_T = TypeVar(\"_T\")\n\nDVC_FILE_SUFFIX = \".dvc\"\nPROJECT_FILE = \"dvc.yaml\"\nLOCK_FILE = \"dvc.lock\"\n\n\nclass FileIsGitIgnored(DvcException):\n    def __init__(self, path, pipeline_file=False):\n        super().__init__(\n            \"{}'{}' is git-ignored.\".format(\n                \"bad DVC file name \" if pipeline_file else \"\", path\n            )\n        )\n\n\nclass ParametrizedDumpError(DvcException):\n    pass\n\n\ndef is_valid_filename(path):\n    return path.endswith(DVC_FILE_SUFFIX) or os.path.basename(path) == PROJECT_FILE\n\n\ndef is_dvc_file(path):\n    return os.path.isfile(path) and (is_valid_filename(path) or is_lock_file(path))\n\n\ndef is_lock_file(path):\n    return os.path.basename(path) == LOCK_FILE\n\n\ndef is_git_ignored(repo, path):\n    from dvc.fs import LocalFileSystem\n    from dvc.scm import NoSCMError\n\n    try:\n        return isinstance(repo.fs, LocalFileSystem) and repo.scm.is_ignored(path)\n    except NoSCMError:\n        return False\n\n\ndef check_dvcfile_path(repo, path):\n    if not is_valid_filename(path):\n        raise StageFileBadNameError(\n            f\"bad DVC file name '{relpath(path)}'. DVC files should be named \"\n            f\"'{PROJECT_FILE}' or have a '.dvc' suffix \"\n            f\"(e.g. '{os.path.basename(path)}.dvc').\"\n        )\n\n    if is_git_ignored(repo, path):\n        raise FileIsGitIgnored(relpath(path), True)\n\n\nclass FileMixin:\n    SCHEMA: Callable[[_T], _T]\n\n    def __init__(self, repo, path, verify=True, **kwargs):\n        self.repo = repo\n        self.path = path\n        self.verify = verify\n\n    def __repr__(self):\n        return f\"{self.__class__.__name__}: {relpath(self.path, self.repo.root_dir)}\"\n\n    def __hash__(self):\n        return hash(self.path)\n\n    def __eq__(self, other):\n        return self.repo == other.repo and os.path.abspath(\n            self.path\n        ) == os.path.abspath(other.path)\n\n    def __str__(self):\n        return f\"{self.__class__.__name__}: {self.relpath}\"\n\n    @property\n    def relpath(self):\n        return relpath(self.path)\n\n    def exists(self):\n        is_ignored = self.repo.dvcignore.is_ignored_file(self.path)\n        return self.repo.fs.exists(self.path) and not is_ignored\n\n    def _is_git_ignored(self):\n        return is_git_ignored(self.repo, self.path)\n\n    def _verify_filename(self):\n        if self.verify:\n            check_dvcfile_path(self.repo, self.path)\n\n    def _check_gitignored(self):\n        if self._is_git_ignored():\n            raise FileIsGitIgnored(self.path)\n\n    def load(self, **kwargs: Any) -> Any:\n        d, _ = self._load(**kwargs)\n        return d\n\n    def _load(self, **kwargs: Any) -> tuple[Any, str]:\n        # it raises the proper exceptions by priority:\n        # 1. when the file doesn't exists\n        # 2. filename is not a DVC file\n        # 3. path doesn't represent a regular file\n        # 4. when the file is git ignored\n        if not self.exists():\n            dvc_ignored = self.repo.dvcignore.is_ignored_file(self.path)\n            raise StageFileDoesNotExistError(self.path, dvc_ignored=dvc_ignored)\n\n        self._verify_filename()\n        if not self.repo.fs.isfile(self.path):\n            raise StageFileIsNotDvcFileError(self.path)\n\n        self._check_gitignored()\n        return self._load_yaml(**kwargs)\n\n    @classmethod\n    def validate(cls, d: _T, fname: Optional[str] = None) -> _T:\n        from dvc.utils.strictyaml import validate\n\n        return validate(d, cls.SCHEMA, path=fname)  # type: ignore[arg-type]\n\n    def _load_yaml(self, **kwargs: Any) -> tuple[Any, str]:\n        from dvc.utils import strictyaml\n\n        return strictyaml.load(\n            self.path,\n            self.SCHEMA,  # type: ignore[arg-type]\n            self.repo.fs,\n            **kwargs,\n        )\n\n    def remove(self, force=False):  # noqa: ARG002\n        with contextlib.suppress(FileNotFoundError):\n            os.unlink(self.path)\n\n    def dump(self, stage, **kwargs):\n        raise NotImplementedError\n\n    def dump_stages(self, stages, **kwargs):\n        raise NotImplementedError\n\n    def merge(self, ancestor, other, allowed=None):\n        raise NotImplementedError\n\n\nclass SingleStageFile(FileMixin):\n    from dvc.schema import COMPILED_SINGLE_STAGE_SCHEMA as SCHEMA\n    from dvc.stage.loader import SingleStageLoader as LOADER  # noqa: N814\n\n    datasets: ClassVar[list[dict[str, Any]]] = []\n    datasets_lock: ClassVar[list[dict[str, Any]]] = []\n    metrics: ClassVar[list[str]] = []\n    plots: ClassVar[Any] = {}\n    params: ClassVar[list[str]] = []\n    artifacts: ClassVar[dict[str, Optional[dict[str, Any]]]] = {}\n\n    @property\n    def stage(self) -> \"Stage\":\n        data, raw = self._load()\n        return self.LOADER.load_stage(self, data, raw)\n\n    @property\n    def stages(self) -> LOADER:\n        data, raw = self._load()\n        return self.LOADER(self, data, raw)\n\n    def dump(self, stage, **kwargs) -> None:\n        \"\"\"Dumps given stage appropriately in the dvcfile.\"\"\"\n        from dvc.stage import PipelineStage\n\n        assert not isinstance(stage, PipelineStage)\n        if self.verify:\n            check_dvcfile_path(self.repo, self.path)\n        logger.debug(\"Saving information to '%s'.\", relpath(self.path))\n        dump_yaml(self.path, serialize.to_single_stage_file(stage, **kwargs))\n        self.repo.scm_context.track_file(self.relpath)\n\n    def dump_stages(self, stages, **kwargs) -> None:\n        if not stages:\n            return None\n\n        assert len(stages) == 1, \"SingleStageFile can only dump one stage.\"\n        return self.dump(stages[0], **kwargs)\n\n    def remove_stage(self, stage):  # noqa: ARG002\n        self.remove()\n\n    def merge(self, ancestor, other, allowed=None):\n        assert isinstance(ancestor, SingleStageFile)\n        assert isinstance(other, SingleStageFile)\n\n        stage = self.stage\n        stage.merge(ancestor.stage, other.stage, allowed=allowed)\n        self.dump(stage)\n\n\nclass ProjectFile(FileMixin):\n    \"\"\"Abstraction for pipelines file, .yaml + .lock combined.\"\"\"\n\n    from dvc.schema import COMPILED_MULTI_STAGE_SCHEMA as SCHEMA\n    from dvc.stage.loader import StageLoader as LOADER  # noqa: N814\n\n    @property\n    def _lockfile(self):\n        return Lockfile(self.repo, os.path.splitext(self.path)[0] + \".lock\")\n\n    def _reset(self):\n        self.__dict__.pop(\"contents\", None)\n        self.__dict__.pop(\"lockfile_contents\", None)\n        self.__dict__.pop(\"resolver\", None)\n        self.__dict__.pop(\"stages\", None)\n\n    def dump(self, stage, update_pipeline=True, update_lock=True, **kwargs):\n        \"\"\"Dumps given stage appropriately in the dvcfile.\"\"\"\n        return self.dump_stages(\n            [stage], update_pipeline=update_pipeline, update_lock=update_lock, **kwargs\n        )\n\n    def dump_stages(self, stages, update_pipeline=True, update_lock=True, **kwargs):\n        from dvc.stage import PipelineStage\n\n        if not stages:\n            return\n\n        for stage in stages:\n            assert isinstance(stage, PipelineStage)\n\n        if self.verify:\n            check_dvcfile_path(self.repo, self.path)\n\n        if update_pipeline:\n            self._dump_pipeline_file(stages)\n\n        if update_lock:\n            self._dump_lockfile(stages, **kwargs)\n\n    def dump_dataset(self, dataset):\n        with modify_yaml(self.path, fs=self.repo.fs) as data:\n            parsed = self.datasets if data else []\n            raw = data.setdefault(\"datasets\", [])\n            loc = next(\n                (i for i, ds in enumerate(parsed) if ds[\"name\"] == dataset[\"name\"]),\n                None,\n            )\n            if loc is not None:\n                if raw[loc] != parsed[loc]:\n                    raise ParametrizedDumpError(\n                        \"cannot update a parametrized dataset entry\"\n                    )\n\n                apply_diff(dataset, raw[loc])\n                raw[loc] = dataset\n            else:\n                raw.append(dataset)\n        self.repo.scm_context.track_file(self.relpath)\n\n    def _dump_lockfile(self, stages, **kwargs):\n        self._lockfile.dump_stages(stages, **kwargs)\n\n    @staticmethod\n    def _check_if_parametrized(stage, action: str = \"dump\") -> None:\n        if stage.raw_data.parametrized:\n            raise ParametrizedDumpError(f\"cannot {action} a parametrized {stage}\")\n\n    def _dump_pipeline_file(self, stages):\n        stages = stages if isinstance(stages, list) else [stages]\n        if not stages:\n            return\n\n        for stage in stages:\n            self._check_if_parametrized(stage)\n\n        with modify_yaml(self.path, fs=self.repo.fs) as data:\n            if not data:\n                logger.info(\"Creating '%s'\", self.relpath)\n\n            data[\"stages\"] = data.get(\"stages\", {})\n            for stage in stages:\n                stage_data = serialize.to_pipeline_file(stage)\n                existing_entry = stage.name in data[\"stages\"]\n                action = \"Modifying\" if existing_entry else \"Adding\"\n                logger.info(\"%s stage '%s' in '%s'\", action, stage.name, self.relpath)\n                if existing_entry:\n                    orig_stage_data = data[\"stages\"][stage.name]\n                    apply_diff(stage_data[stage.name], orig_stage_data)\n                else:\n                    data[\"stages\"].update(stage_data)\n\n        self.repo.scm_context.track_file(self.relpath)\n\n    @property\n    def stage(self):\n        raise DvcException(\"ProjectFile has multiple stages. Please specify it's name.\")\n\n    @cached_property\n    def contents(self) -> dict[str, Any]:\n        return self._load()[0]\n\n    @cached_property\n    def lockfile_contents(self) -> dict[str, Any]:\n        return self._lockfile.load()\n\n    @cached_property\n    def resolver(self) -> \"DataResolver\":\n        from .parsing import DataResolver\n\n        wdir = self.repo.fs.parent(self.path)\n        return DataResolver(self.repo, wdir, self.contents)\n\n    @cached_property\n    def stages(self) -> LOADER:\n        return self.LOADER(self, self.contents, self.lockfile_contents)\n\n    @property\n    def artifacts(self) -> dict[str, Optional[dict[str, Any]]]:\n        return self.resolver.resolve_artifacts()\n\n    @property\n    def metrics(self) -> list[str]:\n        return self.resolver.resolve_metrics()\n\n    @property\n    def params(self) -> list[str]:\n        return self.resolver.resolve_params()\n\n    @property\n    def plots(self) -> list[Any]:\n        return self.resolver.resolve_plots()\n\n    @property\n    def datasets(self) -> list[dict[str, Any]]:\n        return self.resolver.resolve_datasets()\n\n    @property\n    def datasets_lock(self) -> list[dict[str, Any]]:\n        return self.lockfile_contents.get(\"datasets\", [])\n\n    def remove(self, force=False):\n        if not force:\n            logger.warning(\"Cannot remove pipeline file.\")\n            return\n\n        super().remove()\n        self._lockfile.remove()\n\n    def remove_stage(self, stage):\n        self._check_if_parametrized(stage, \"remove\")\n        self._lockfile.remove_stage(stage)\n        if not self.exists():\n            return\n\n        d, _ = self._load_yaml(round_trip=True)\n        if stage.name not in d.get(\"stages\", {}):\n            return\n\n        logger.debug(\"Removing '%s' from '%s'\", stage.name, self.path)\n        del d[\"stages\"][stage.name]\n\n        if d[\"stages\"]:\n            dump_yaml(self.path, d)\n        else:\n            super().remove()\n\n    def merge(self, ancestor, other, allowed=None):\n        raise NotImplementedError\n\n\nclass Lockfile(FileMixin):\n    from dvc.schema import COMPILED_LOCKFILE_SCHEMA as SCHEMA\n\n    def _verify_filename(self):\n        pass  # lockfile path is hardcoded, so no need to verify here\n\n    def _load(self, **kwargs: Any):\n        try:\n            return super()._load(**kwargs)\n        except StageFileDoesNotExistError:\n            # we still need to account for git-ignored dvc.lock file\n            # even though it may not exist or have been .dvcignored\n            self._check_gitignored()\n            return {}, \"\"\n\n    def dump_dataset(self, dataset: dict):\n        with modify_yaml(self.path, fs=self.repo.fs) as data:\n            data.update({\"schema\": \"2.0\"})\n            if not data:\n                logger.info(\"Generating lock file '%s'\", self.relpath)\n\n            datasets: list[dict] = data.setdefault(\"datasets\", [])\n            loc = next(\n                (i for i, ds in enumerate(datasets) if ds[\"name\"] == dataset[\"name\"]),\n                None,\n            )\n            if loc is not None:\n                datasets[loc] = dataset\n            else:\n                datasets.append(dataset)\n            data.setdefault(\"stages\", {})\n        self.repo.scm_context.track_file(self.relpath)\n\n    def dump_stages(self, stages, **kwargs):\n        if not stages:\n            return\n\n        is_modified = False\n        log_updated = False\n        with modify_yaml(self.path, fs=self.repo.fs) as data:\n            if not data:\n                data.update({\"schema\": \"2.0\"})\n                # order is important, meta should always be at the top\n                logger.info(\"Generating lock file '%s'\", self.relpath)\n\n            data[\"stages\"] = data.get(\"stages\", {})\n            for stage in stages:\n                stage_data = serialize.to_lockfile(stage, **kwargs)\n                modified = data[\"stages\"].get(stage.name, {}) != stage_data.get(\n                    stage.name, {}\n                )\n                if modified:\n                    is_modified = True\n                    if not log_updated:\n                        logger.info(\"Updating lock file '%s'\", self.relpath)\n                        log_updated = True\n                data[\"stages\"].update(stage_data)\n\n        if is_modified:\n            self.repo.scm_context.track_file(self.relpath)\n\n    def dump(self, stage, **kwargs):\n        self.dump_stages([stage], **kwargs)\n\n    def remove_stage(self, stage):\n        if not self.exists():\n            return\n\n        d, _ = self._load_yaml(round_trip=True)\n        data = d.get(\"stages\", {})\n        if stage.name not in data:\n            return\n\n        logger.debug(\"Removing '%s' from '%s'\", stage.name, self.path)\n        del data[stage.name]\n\n        if data:\n            dump_yaml(self.path, d)\n        else:\n            self.remove()\n\n    def merge(self, ancestor, other, allowed=None):\n        raise NotImplementedError\n\n\ndef load_file(\n    repo: \"Repo\", path: \"StrOrBytesPath\", **kwargs: Any\n) -> Union[ProjectFile, SingleStageFile]:\n    _, ext = os.path.splitext(path)\n    if ext in (\".yaml\", \".yml\"):\n        return ProjectFile(repo, path, **kwargs)\n    return SingleStageFile(repo, path, **kwargs)\n"
  },
  {
    "path": "dvc/env.py",
    "content": "DVC_ANALYTICS_ENDPOINT = \"DVC_ANALYTICS_ENDPOINT\"\nDVC_DAEMON = \"DVC_DAEMON\"\nDVC_DAEMON_LOGFILE = \"DVC_DAEMON_LOGFILE\"\nDVC_EXP_AUTO_PUSH = \"DVC_EXP_AUTO_PUSH\"\nDVC_EXP_BASELINE_REV = \"DVC_EXP_BASELINE_REV\"\nDVC_EXP_GIT_REMOTE = \"DVC_EXP_GIT_REMOTE\"\nDVC_EXP_NAME = \"DVC_EXP_NAME\"\nDVC_GLOBAL_CONFIG_DIR = \"DVC_GLOBAL_CONFIG_DIR\"\nDVC_IGNORE_ISATTY = \"DVC_IGNORE_ISATTY\"\nDVC_NO_ANALYTICS = \"DVC_NO_ANALYTICS\"\nDVC_PAGER = \"DVC_PAGER\"\nDVC_ROOT = \"DVC_ROOT\"\nDVC_SHOW_TRACEBACK = \"DVC_SHOW_TRACEBACK\"\nDVC_SITE_CACHE_DIR = \"DVC_SITE_CACHE_DIR\"\nDVC_STUDIO_OFFLINE = \"DVC_STUDIO_OFFLINE\"\nDVC_STUDIO_REPO_URL = \"DVC_STUDIO_REPO_URL\"\nDVC_STUDIO_TOKEN = \"DVC_STUDIO_TOKEN\"  # noqa: S105\nDVC_STUDIO_URL = \"DVC_STUDIO_URL\"\nDVC_SQLALCHEMY_ECHO = \"DVC_SQLALCHEMY_ECHO\"\nDVC_SYSTEM_CONFIG_DIR = \"DVC_SYSTEM_CONFIG_DIR\"\nDVC_UPDATER_ENDPOINT = \"DVC_UPDATER_ENDPOINT\"\nDVC_STAGE = \"DVC_STAGE\"\n"
  },
  {
    "path": "dvc/exceptions.py",
    "content": "\"\"\"Exceptions raised by the dvc.\"\"\"\n\nimport errno\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.utils import format_link\n\nif TYPE_CHECKING:\n    from dvc.stage import Stage\n\n\nclass DvcException(Exception):  # noqa: N818\n    \"\"\"Base class for all dvc exceptions.\"\"\"\n\n    def __init__(self, msg, *args):\n        assert msg\n        self.msg = msg\n        super().__init__(msg, *args)\n\n\nclass InvalidArgumentError(ValueError, DvcException):\n    \"\"\"Thrown if arguments are invalid.\"\"\"\n\n    def __init__(self, msg, *args):\n        self.msg = msg\n        super().__init__(msg, *args)\n\n\nclass OutputDuplicationError(DvcException):\n    \"\"\"Thrown if a file/directory is specified as an output in more than one\n    stage.\n\n    Args:\n        output (unicode): path to the file/directory.\n        stages (list): list of paths to stages.\n    \"\"\"\n\n    def __init__(self, output: str, stages: set[\"Stage\"]):\n        from funcy import first\n\n        assert isinstance(output, str)\n        assert all(hasattr(stage, \"relpath\") for stage in stages)\n        if len(stages) == 1:\n            stage = first(stages)\n            msg = (\n                f\"output '{output}' is already specified in {stage}.\"\n                f\"\\nUse `dvc remove {stage.addressing}` to stop tracking the \"\n                \"overlapping output.\"\n            )\n        else:\n            stage_names = \"\\n\".join([\"\\t- \" + s.addressing for s in stages])\n            msg = (\n                f\"output '{output}' is specified in:\\n{stage_names}\"\n                \"\\nUse `dvc remove` with any of the above targets to stop tracking the \"\n                \"overlapping output.\"\n            )\n        super().__init__(msg)\n        self.stages = stages\n        self.output = output\n\n\nclass OutputNotFoundError(DvcException):\n    \"\"\"Thrown if a file/directory is not found as an output in any pipeline.\n\n    Args:\n        output (unicode): path to the file/directory.\n    \"\"\"\n\n    def __init__(self, output, repo=None):\n        from dvc.utils import relpath\n\n        self.output = output\n        self.repo = repo\n        super().__init__(\n            f\"Unable to find DVC file with output {relpath(self.output)!r}\"\n        )\n\n\nclass StageNotFoundError(DvcException):\n    pass\n\n\nclass StagePathAsOutputError(DvcException):\n    \"\"\"Thrown if directory that stage is going to be saved in is specified as\n    an output of another stage.\n\n    Args:\n        stage (Stage): a stage that is in some other stages output\n        output (str): an output covering the stage above\n    \"\"\"\n\n    def __init__(self, stage, output):\n        assert isinstance(output, str)\n        super().__init__(f\"{stage} is within an output {output!r} of another stage\")\n\n\nclass CircularDependencyError(DvcException):\n    \"\"\"Thrown if a file/directory specified both as an output and as a\n    dependency.\n\n    Args:\n        dependency (str): path to the dependency.\n    \"\"\"\n\n    def __init__(self, dependency):\n        assert isinstance(dependency, str)\n\n        msg = \"'{}' is specified as an output and as a dependency.\"\n        super().__init__(msg.format(dependency))\n\n\nclass ArgumentDuplicationError(DvcException):\n    \"\"\"Thrown if a file/directory is specified as a dependency/output more\n    than once.\n\n    Args:\n        path (str): path to the file/directory.\n    \"\"\"\n\n    def __init__(self, path):\n        assert isinstance(path, str)\n        super().__init__(f\"file '{path}' is specified more than once.\")\n\n\nclass MoveNotDataSourceError(DvcException):\n    \"\"\"Thrown when trying to move a file/directory that is not an output\n    in a data source stage.\n\n    Args:\n        path (str): path to the file/directory.\n    \"\"\"\n\n    def __init__(self, path):\n        msg = (\n            \"move is not permitted for stages that are not data sources. \"\n            f\"You need to either move {path!r} to a new location and edit \"\n            f\"it by hand, or remove {path!r} and create a new one at the \"\n            \"desired location.\"\n        )\n        super().__init__(msg)\n\n\nclass NotDvcRepoError(DvcException):\n    \"\"\"Thrown if a directory is not a DVC repo\"\"\"\n\n\nclass CyclicGraphError(DvcException):\n    def __init__(self, stages):\n        assert isinstance(stages, list)\n        stage_part = \"stage\" if len(stages) == 1 else \"stages\"\n        msg = (\n            \"Same item(s) are defined as both a dependency and an output \"\n            \"in {stage_part}: {stage}.\"\n        )\n        super().__init__(\n            msg.format(\n                stage_part=stage_part,\n                stage=\", \".join(s.addressing for s in stages),\n            )\n        )\n\n\nclass ConfirmRemoveError(DvcException):\n    def __init__(self, path):\n        super().__init__(\n            f\"unable to remove {path!r} without a confirmation. Use `-f` to force.\"\n        )\n\n\nclass InitError(DvcException):\n    pass\n\n\nclass ReproductionError(DvcException):\n    pass\n\n\nclass BadMetricError(DvcException):\n    def __init__(self, paths):\n        super().__init__(\n            \"the following metrics do not exist, \"\n            \"are not metrics files or are malformed: {paths}\".format(\n                paths=\", \".join(f\"'{path}'\" for path in paths)\n            )\n        )\n\n\nclass OverlappingOutputPathsError(DvcException):\n    def __init__(self, parent, overlapping_out, message):\n        self.parent = parent\n        self.overlapping_out = overlapping_out\n        super().__init__(message)\n\n\nclass CheckoutErrorSuggestGit(DvcException):\n    def __init__(self, target):\n        super().__init__(f\"Did you mean `git checkout {target}`?\")\n\n\nclass ETagMismatchError(DvcException):\n    def __init__(self, etag, cached_etag):\n        super().__init__(\n            \"ETag mismatch detected when copying file to cache! \"\n            f\"(expected: '{etag}', actual: '{cached_etag}')\"\n        )\n\n\nclass FileExistsLocallyError(FileExistsError, DvcException):\n    def __init__(self, path, hint=None):\n        import os.path\n\n        self.path = path\n        hint = \"\" if hint is None else f\". {hint}\"\n        path_typ = \"directory\" if os.path.isdir(path) else \"file\"\n        msg = f\"The {path_typ} '{path}' already exists locally{hint}\"\n        super().__init__(msg)\n        self.errno = errno.EEXIST\n\n\nclass FileMissingError(DvcException):\n    def __init__(self, path, hint=None):\n        self.path = path\n        hint = \"\" if hint is None else f\". {hint}\"\n        super().__init__(f\"Can't find '{path}' neither locally nor on remote{hint}\")\n\n\nclass FileTransferError(DvcException):\n    _METHOD = \"transfer\"\n\n    def __init__(self, amount):\n        self.amount = amount\n\n        super().__init__(f\"{amount} files failed to {self._METHOD}\")\n\n\nclass DownloadError(FileTransferError):\n    _METHOD = \"download\"\n\n\nclass UploadError(FileTransferError):\n    _METHOD = \"upload\"\n\n\nclass CheckoutError(DvcException):\n    def __init__(self, target_infos: list[str], result: dict):\n        from dvc.utils import error_link\n\n        self.target_infos = target_infos\n        self.result = result\n        targets = [str(t) for t in target_infos]\n        m = (\n            \"Checkout failed for following targets:\\n{}\\nIs your \"\n            \"cache up to date?\\n{}\".format(\n                \"\\n\".join(targets), error_link(\"missing-files\")\n            )\n        )\n        super().__init__(m)\n\n\nclass CollectCacheError(DvcException):\n    pass\n\n\nclass NoRemoteInExternalRepoError(DvcException):\n    def __init__(self, url):\n        super().__init__(f\"No DVC remote is specified in target repository '{url}'.\")\n\n\nclass NoOutputInExternalRepoError(DvcException):\n    def __init__(self, path, external_repo_path, external_repo_url):\n        from dvc.utils import relpath\n\n        super().__init__(\n            f\"Output {relpath(path, external_repo_path)!r} \"\n            f\"not found in target repository '{external_repo_url}'\"\n        )\n\n\nclass HTTPError(DvcException):\n    def __init__(self, code, reason):\n        super().__init__(f\"'{code} {reason}'\")\n\n\nclass PathMissingError(DvcException):\n    default_msg = (\n        \"The path '{}' does not exist in the target repository '{}'\"\n        \" neither as a DVC output nor as a Git-tracked file.\"\n    )\n    default_msg_dvc_only = (\n        \"The path '{}' does not exist in the target repository '{}' as an DVC output.\"\n    )\n\n    def __init__(self, path, repo, dvc_only=False):\n        msg = self.default_msg if not dvc_only else self.default_msg_dvc_only\n        super().__init__(msg.format(path, repo))\n        self.dvc_only = dvc_only\n\n\nclass URLMissingError(DvcException):\n    def __init__(self, url):\n        super().__init__(f\"The path '{url}' does not exist\")\n\n\nclass IsADirectoryError(DvcException):  # noqa: A001\n    \"\"\"Raised when a file operation is requested on a directory.\"\"\"\n\n\nclass NoOutputOrStageError(DvcException):\n    \"\"\"\n    Raised when the target is neither an output nor a stage name in dvc.yaml\n    \"\"\"\n\n    def __init__(self, target, file):\n        super().__init__(\n            f\"'{target}' does not exist as an output or a stage name in '{file}'\"\n        )\n\n\nclass MergeError(DvcException):\n    pass\n\n\nclass CacheLinkError(DvcException):\n    SUPPORT_LINK = \"See {} for more information.\".format(\n        format_link(\"https://dvc.org/doc/user-guide/troubleshooting#cache-types\")\n    )\n\n    def __init__(self, fs_paths):\n        msg = \"No possible cache link types for '{}'. {}\".format(\n            \", \".join(fs_paths), self.SUPPORT_LINK\n        )\n        super().__init__(msg)\n        self.fs_paths = fs_paths\n\n\nclass PrettyDvcException(DvcException):\n    def __pretty_exc__(self, **kwargs):\n        \"\"\"Print prettier exception message.\"\"\"\n\n\nclass ArtifactNotFoundError(DvcException):\n    \"\"\"Thrown if an artifact is not found in the DVC repo.\n\n    Args:\n        name (str): artifact name.\n    \"\"\"\n\n    def __init__(\n        self,\n        name: str,\n        version: Optional[str] = None,\n        stage: Optional[str] = None,\n    ):\n        self.name = name\n        self.version = version\n        self.stage = stage\n\n        desc = f\" @ {stage or version}\" if (stage or version) else \"\"\n        super().__init__(f\"Unable to find artifact '{name}{desc}'\")\n\n\nclass RevCollectionError(DvcException):\n    \"\"\"Thrown if a revision failed to be collected.\n\n    Args:\n        rev (str): revision that failed (or \"workspace\").\n    \"\"\"\n\n    def __init__(self, rev):\n        self.rev = rev\n        super().__init__(f\"Failed to collect '{rev}'\")\n"
  },
  {
    "path": "dvc/fs/__init__.py",
    "content": "import glob\nfrom itertools import repeat\nfrom typing import Optional\nfrom urllib.parse import urlparse\n\nfrom dvc.config import ConfigError as RepoConfigError\nfrom dvc.config_schema import SCHEMA, Invalid\nfrom dvc_http import HTTPFileSystem, HTTPSFileSystem  # noqa: F401\n\n# pylint: disable=unused-import\nfrom dvc_objects.fs import (  # noqa: F401\n    LocalFileSystem,\n    MemoryFileSystem,\n    Schemes,\n    generic,\n    get_fs_cls,\n    known_implementations,\n    localfs,\n    registry,\n    system,\n    utils,\n)\nfrom dvc_objects.fs.base import AnyFSPath, FileSystem  # noqa: F401, TC001\nfrom dvc_objects.fs.errors import (  # noqa: F401\n    AuthError,\n    ConfigError,\n    RemoteMissingDepsError,\n)\n\nfrom .callbacks import Callback  # noqa: F401\nfrom .data import DataFileSystem  # noqa: F401\nfrom .dvc import DVCFileSystem\nfrom .git import GitFileSystem  # noqa: F401\n\nknown_implementations.update(\n    {\n        \"dvc\": {\n            \"class\": \"dvc.fs.dvc.DVCFileSystem\",\n            \"err\": \"dvc is supported, but requires 'dvc' to be installed\",\n        },\n        \"git\": {\n            \"class\": \"dvc.fs.git.GitFileSystem\",\n            \"err\": \"git is supported, but requires 'dvc' to be installed\",\n        },\n    }\n)\n\n\ndef download(\n    fs: \"FileSystem\", fs_path: str, to: str, jobs: Optional[int] = None\n) -> list[tuple[str, str, Optional[dict]]]:\n    from dvc.scm import lfs_prefetch\n\n    from .callbacks import TqdmCallback\n\n    with TqdmCallback(desc=f\"Downloading {fs.name(fs_path)}\", unit=\"files\") as cb:\n        if isinstance(fs, DVCFileSystem):\n            lfs_prefetch(\n                fs,\n                [\n                    f\"{fs.normpath(glob.escape(fs_path))}/**\"\n                    if fs.isdir(fs_path)\n                    else glob.escape(fs_path)\n                ],\n            )\n            if not glob.has_magic(fs_path):\n                return fs._get(fs_path, to, batch_size=jobs, callback=cb)\n\n        # NOTE: We use dvc-objects generic.copy over fs.get since it makes file\n        # download atomic and avoids fsspec glob/regex path expansion.\n        if fs.isdir(fs_path):\n            from_infos = [\n                path for path in fs.find(fs_path) if not path.endswith(fs.flavour.sep)\n            ]\n            if not from_infos:\n                localfs.makedirs(to, exist_ok=True)\n                return []\n            to_infos = [\n                localfs.join(to, *fs.relparts(info, fs_path)) for info in from_infos\n            ]\n        else:\n            from_infos = [fs_path]\n            to_infos = [to]\n\n        cb.set_size(len(from_infos))\n        jobs = jobs or fs.jobs\n        generic.copy(fs, from_infos, localfs, to_infos, callback=cb, batch_size=jobs)\n        return list(zip(from_infos, to_infos, repeat(None)))\n\n\ndef parse_external_url(url, fs_config=None, config=None):\n    remote_config = dict(fs_config) if fs_config else {}\n    remote_config[\"url\"] = url\n    fs_cls, resolved_fs_config, fs_path = get_cloud_fs(config, **remote_config)\n    fs = fs_cls(**resolved_fs_config)\n    return fs, fs_path\n\n\ndef get_fs_config(config, **kwargs):\n    name = kwargs.get(\"name\")\n    if name:\n        try:\n            remote_conf = config[\"remote\"][name.lower()]\n        except KeyError:\n            from dvc.config import RemoteNotFoundError\n\n            raise RemoteNotFoundError(f\"remote '{name}' doesn't exist\")  # noqa: B904\n    else:\n        remote_conf = kwargs\n    return _resolve_remote_refs(config, remote_conf)\n\n\ndef _resolve_remote_refs(config, remote_conf):\n    # Support for cross referenced remotes.\n    # This will merge the settings, shadowing base ref with remote_conf.\n    # For example, having:\n    #\n    #       dvc remote add server ssh://localhost\n    #       dvc remote modify server user root\n    #       dvc remote modify server ask_password true\n    #\n    #       dvc remote add images remote://server/tmp/pictures\n    #       dvc remote modify images user alice\n    #       dvc remote modify images ask_password false\n    #       dvc remote modify images password asdf1234\n    #\n    # Results on a config dictionary like:\n    #\n    #       {\n    #           \"url\": \"ssh://localhost/tmp/pictures\",\n    #           \"user\": \"alice\",\n    #           \"password\": \"asdf1234\",\n    #           \"ask_password\": False,\n    #       }\n    parsed = urlparse(remote_conf[\"url\"])\n    if parsed.scheme != \"remote\":\n        return remote_conf\n\n    base = get_fs_config(config, name=parsed.netloc)\n    cls, _, _ = get_cloud_fs(config, **base)\n    relpath = parsed.path.lstrip(\"/\").replace(\"/\", cls.sep)\n    url = cls.sep.join((base[\"url\"], relpath))\n    return {**base, **remote_conf, \"url\": url}\n\n\ndef get_cloud_fs(repo_config, **kwargs):\n    repo_config = repo_config or {}\n    core_config = repo_config.get(\"core\", {})\n\n    remote_conf = get_fs_config(repo_config, **kwargs)\n    try:\n        remote_conf = SCHEMA[\"remote\"][str](remote_conf)  # type: ignore[index]\n    except Invalid as exc:\n        raise RepoConfigError(str(exc)) from None\n\n    if \"checksum_jobs\" not in remote_conf:\n        checksum_jobs = core_config.get(\"checksum_jobs\")\n        if checksum_jobs:\n            remote_conf[\"checksum_jobs\"] = checksum_jobs\n\n    cls = get_fs_cls(remote_conf)\n\n    url = remote_conf.pop(\"url\")\n    if cls.protocol in [\"webdav\", \"webdavs\"]:\n        # For WebDAVFileSystem, provided url is the base path itself, so it\n        # should be treated as being a root path.\n        fs_path = cls.root_marker\n    else:\n        fs_path = cls._strip_protocol(url)\n\n    extras = cls._get_kwargs_from_urls(url)\n    conf = extras | remote_conf  # remote config takes priority\n    return cls, conf, fs_path\n"
  },
  {
    "path": "dvc/fs/callbacks.py",
    "content": "from contextlib import ExitStack\nfrom typing import TYPE_CHECKING, BinaryIO, Optional, Union\n\nfrom fsspec.callbacks import DEFAULT_CALLBACK, Callback  # noqa: F401\nfrom fsspec.callbacks import TqdmCallback as _TqdmCallback\n\nfrom dvc.progress import Tqdm\nfrom dvc.utils.objects import cached_property\n\nif TYPE_CHECKING:\n    from rich.progress import TaskID\n    from tqdm import tqdm\n\n    from dvc.ui._rich_progress import RichTransferProgress\n\n\nclass TqdmCallback(_TqdmCallback):\n    def __init__(\n        self,\n        size: Optional[int] = None,\n        value: int = 0,\n        progress_bar: Optional[\"tqdm\"] = None,\n        tqdm_cls: Optional[type[\"tqdm\"]] = None,\n        **tqdm_kwargs,\n    ):\n        tqdm_kwargs.pop(\"total\", None)\n        super().__init__(\n            tqdm_kwargs=tqdm_kwargs, tqdm_cls=tqdm_cls or Tqdm, size=size, value=value\n        )\n        if progress_bar is not None:\n            self.tqdm = progress_bar\n\n    def branched(self, path_1: \"Union[str, BinaryIO]\", path_2: str, **kwargs):\n        desc = path_1 if isinstance(path_1, str) else path_2\n        return TqdmCallback(bytes=True, desc=desc)\n\n\nclass RichCallback(Callback):\n    def __init__(\n        self,\n        size: Optional[int] = None,\n        value: int = 0,\n        progress: Optional[\"RichTransferProgress\"] = None,\n        desc: Optional[str] = None,\n        bytes: bool = False,  # noqa: A002\n        unit: Optional[str] = None,\n        disable: bool = False,\n        transient: bool = True,\n    ) -> None:\n        self._progress = progress\n        self.disable = disable\n        self._task_kwargs = {\n            \"description\": desc or \"\",\n            \"bytes\": bytes,\n            \"unit\": unit,\n            \"total\": size or 0,\n            \"visible\": False,\n            \"progress_type\": None if bytes else \"summary\",\n        }\n        self._transient = transient\n        self._stack = ExitStack()\n        super().__init__(size=size, value=value)\n\n    @cached_property\n    def progress(self) -> \"RichTransferProgress\":\n        from dvc.ui import ui\n        from dvc.ui._rich_progress import RichTransferProgress\n\n        if self._progress is not None:\n            return self._progress\n\n        progress = RichTransferProgress(\n            transient=self._transient,\n            disable=self.disable,\n            console=ui.error_console,\n        )\n        self._stack.enter_context(progress)\n        return progress\n\n    @cached_property\n    def task(self) -> \"TaskID\":\n        return self.progress.add_task(**self._task_kwargs)  # type: ignore[arg-type]\n\n    def close(self):\n        if self._transient:\n            self.progress.clear_task(self.task)\n        self._stack.close()\n\n    def call(self, hook_name=None, **kwargs):  # noqa: ARG002\n        self.progress.update(\n            self.task,\n            completed=self.value,\n            total=self.size,\n            visible=not self.disable,\n        )\n\n    def branched(self, path_1: Union[str, BinaryIO], path_2: str, **kwargs):\n        return RichCallback(\n            progress=self.progress,\n            desc=path_1 if isinstance(path_1, str) else path_2,\n            bytes=True,\n            transient=self._transient,\n        )\n"
  },
  {
    "path": "dvc/fs/data.py",
    "content": "import functools\nimport os\nfrom typing import TYPE_CHECKING\n\nfrom dvc.log import logger\nfrom dvc.utils import as_posix\nfrom dvc_objects.fs.base import FileSystem\n\nif TYPE_CHECKING:\n    from dvc_data.fs import DataFileSystem as _DataFileSystem\n\n\nlogger = logger.getChild(__name__)\n\n\nclass DataFileSystem(FileSystem):\n    protocol = \"local\"\n\n    PARAM_CHECKSUM = \"md5\"\n\n    def _prepare_credentials(self, **config):\n        return config\n\n    @functools.cached_property\n    def fs(self) -> \"_DataFileSystem\":\n        from dvc_data.fs import DataFileSystem as _DataFileSystem\n\n        return _DataFileSystem(**self.fs_args)\n\n    def getcwd(self):\n        return self.fs.getcwd()\n\n    def isdvc(self, path, **kwargs):\n        return self.fs.isdvc(path, **kwargs)\n\n    def from_os_path(self, path):\n        if os.path.isabs(path):\n            path = os.path.splitdrive(path)[1]\n\n        return as_posix(path)\n"
  },
  {
    "path": "dvc/fs/dvc.py",
    "content": "import errno\nimport functools\nimport ntpath\nimport os\nimport posixpath\nimport threading\nfrom collections import defaultdict, deque\nfrom contextlib import ExitStack, nullcontext, suppress\nfrom glob import has_magic\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, Union\n\nfrom fsspec.spec import DEFAULT_CALLBACK, AbstractFileSystem\nfrom funcy import wrap_with\n\nfrom dvc.log import logger\nfrom dvc.utils.threadpool import ThreadPoolExecutor\nfrom dvc_objects.fs.base import AnyFSPath, FileSystem\n\nfrom .data import DataFileSystem\n\nif TYPE_CHECKING:\n    from contextlib import AbstractContextManager\n\n    from dvc.repo import Repo\n    from dvc.types import DictStrAny, StrPath\n\n    from .callbacks import Callback\n\nlogger = logger.getChild(__name__)\n\nRepoFactory = Union[Callable[..., \"Repo\"], type[\"Repo\"]]\nKey = tuple[str, ...]\n\n\ndef as_posix(path: str) -> str:\n    return path.replace(ntpath.sep, posixpath.sep)\n\n\n# NOT the same as dvc.dvcfile.is_dvc_file()!\ndef _is_dvc_file(fname):\n    from dvc.dvcfile import is_valid_filename\n    from dvc.ignore import DvcIgnore\n\n    return is_valid_filename(fname) or fname == DvcIgnore.DVCIGNORE_FILE\n\n\ndef _merge_info(repo, key, fs_info, dvc_info):\n    from . import utils\n\n    ret = {\"repo\": repo}\n\n    if dvc_info:\n        dvc_info[\"isout\"] = any(\n            (len(out_key) <= len(key) and key[: len(out_key)] == out_key)\n            for out_key in repo.index.data_keys[\"repo\"]\n        )\n        dvc_info[\"isdvc\"] = dvc_info[\"isout\"]\n        ret[\"dvc_info\"] = dvc_info\n        ret[\"type\"] = dvc_info[\"type\"]\n        ret[\"size\"] = dvc_info[\"size\"]\n        if not fs_info and \"md5\" in dvc_info:\n            ret[\"md5\"] = dvc_info[\"md5\"]\n        if not fs_info and \"md5-dos2unix\" in dvc_info:\n            ret[\"md5-dos2unix\"] = dvc_info[\"md5-dos2unix\"]\n\n    if fs_info:\n        ret[\"type\"] = fs_info[\"type\"]\n        ret[\"size\"] = fs_info[\"size\"]\n        ret[\"fs_info\"] = fs_info\n        isexec = False\n        if fs_info[\"type\"] == \"file\":\n            isexec = utils.is_exec(fs_info[\"mode\"])\n        ret[\"isexec\"] = isexec\n\n    return ret\n\n\ndef _get_dvc_path(dvc_fs, subkey):\n    return dvc_fs.join(*subkey) if subkey else \"\"\n\n\nclass _DVCFileSystem(AbstractFileSystem):\n    cachable = False\n    root_marker = \"/\"\n\n    def __init__(\n        self,\n        repo: Union[\"Repo\", os.PathLike[str], str, None] = None,\n        rev: Optional[str] = None,\n        subrepos: bool = False,\n        repo_factory: Optional[RepoFactory] = None,\n        fo: Optional[str] = None,\n        target_options: Optional[dict[str, Any]] = None,  # noqa: ARG002\n        target_protocol: Optional[str] = None,  # noqa: ARG002\n        config: Optional[\"DictStrAny\"] = None,\n        remote: Optional[str] = None,\n        remote_config: Optional[\"DictStrAny\"] = None,\n        **kwargs,\n    ) -> None:\n        \"\"\"DVC + git-tracked files fs.\n\n        Args:\n            repo (str | os.PathLike[str] | Repo, optional): A url or a path to a DVC/Git\n                repository, or a `Repo` instance.\n                Defaults to a DVC repository in the current working directory.\n                Both HTTP and SSH protocols are supported for remote Git repos\n                (e.g. [user@]server:project.git).\n            rev (str, optional): Any Git revision such as a branch or tag name,\n                a commit hash or a dvc experiment name.\n                Defaults to the default branch in case of remote repositories.\n                In case of a local repository, if rev is unspecified, it will\n                default to the working directory.\n                If the repo is not a Git repo, this option is ignored.\n            subrepos (bool): traverse to subrepos.\n                By default, it ignores subrepos.\n            repo_factory (callable): A function to initialize subrepo with.\n                The default is `Repo`.\n            config (dict): Repo config to be passed into `repo_factory`.\n            remote (str): Remote name to be passed into `repo_factory`.\n            remote_config(dict): Remote config to be passed into `repo_factory`.\n\n        Examples:\n            - Opening a filesystem from repo in current working directory\n\n            >>> fs = DVCFileSystem()\n\n            - Opening a filesystem from local repository\n\n            >>> fs = DVCFileSystem(\"path/to/local/repository\")\n\n            - Opening a remote repository\n\n            >>> fs = DVCFileSystem(\n            ...    \"https://github.com/iterative/example-get-started\",\n            ...    rev=\"main\",\n            ... )\n        \"\"\"\n        from dvc.repo import Repo\n\n        # kwargs.get(\"url\") is for maintaining backward compatibility\n        repo = repo or fo or kwargs.get(\"url\")\n        if isinstance(repo, Repo):\n            self._repo: Optional[Repo] = repo\n            url = None\n        else:\n            self._repo = None\n            url = os.fspath(repo) if repo else None\n\n        super().__init__()\n        self._repo_factory = repo_factory\n        self._traverse_subrepos = subrepos\n        self._repo_stack = ExitStack()\n        self._repo_kwargs = {\n            \"url\": url,\n            \"rev\": rev,\n            \"subrepos\": subrepos,\n            \"config\": config,\n            \"remote\": remote,\n            \"remote_config\": remote_config,\n        }\n\n    def getcwd(self):\n        relparts: tuple[str, ...] = ()\n        assert self.repo is not None\n        if self.repo.fs.isin(self.repo.fs.getcwd(), self.repo.root_dir):\n            relparts = self.repo.fs.relparts(self.repo.fs.getcwd(), self.repo.root_dir)\n        return self.root_marker + self.sep.join(relparts)\n\n    @classmethod\n    def join(cls, *parts: str) -> str:\n        return posixpath.join(*parts)\n\n    @classmethod\n    def parts(cls, path: str) -> tuple[str, ...]:\n        ret = []\n        while True:\n            path, part = posixpath.split(path)\n\n            if part:\n                ret.append(part)\n                continue\n\n            if path:\n                ret.append(path)\n\n            break\n\n        ret.reverse()\n\n        return tuple(ret)\n\n    def normpath(self, path: str) -> str:\n        return posixpath.normpath(path)\n\n    def abspath(self, path: str) -> str:\n        if not posixpath.isabs(path):\n            path = self.join(self.getcwd(), path)\n        return self.normpath(path)\n\n    def relpath(self, path: str, start: Optional[str] = None) -> str:\n        if start is None:\n            start = \".\"\n        return posixpath.relpath(self.abspath(path), start=self.abspath(start))\n\n    def relparts(self, path: str, start: Optional[str] = None) -> tuple[str, ...]:\n        return self.parts(self.relpath(path, start=start))\n\n    @functools.cached_property\n    def repo(self):\n        if self._repo:\n            return self._repo\n\n        repo = self._make_repo(**self._repo_kwargs)\n\n        self._repo_stack.enter_context(repo)\n        self._repo = repo\n        return repo\n\n    @functools.cached_property\n    def repo_factory(self):\n        if self._repo_factory:\n            return self._repo_factory\n\n        if self._repo:\n            from dvc.repo import Repo\n\n            return Repo\n\n        return self.repo._fs_conf[\"repo_factory\"]\n\n    @functools.cached_property\n    def fsid(self) -> str:\n        from fsspec.utils import tokenize\n\n        from dvc.scm import NoSCM\n\n        return \"dvcfs_\" + tokenize(\n            self.repo.url or self.repo.root_dir,\n            self.repo.get_rev() if not isinstance(self.repo.scm, NoSCM) else None,\n        )\n\n    def _get_key(self, path: \"StrPath\") -> Key:\n        path = os.fspath(path)\n        parts = self.repo.fs.relparts(path, self.repo.root_dir)\n        if parts == (os.curdir,):\n            return ()\n        return parts\n\n    @functools.cached_property\n    def _subrepos_trie(self):\n        \"\"\"Keeps track of each and every path with the corresponding repo.\"\"\"\n\n        from pygtrie import Trie\n\n        trie = Trie()\n        key = self._get_key(self.repo.root_dir)\n        trie[key] = self.repo\n        return trie\n\n    def _get_key_from_relative(self, path) -> Key:\n        path = self._strip_protocol(path)\n        parts = self.relparts(path, self.root_marker)\n        if parts and parts[0] == os.curdir:\n            return parts[1:]\n        return parts\n\n    def _from_key(self, parts: Key) -> str:\n        return self.repo.fs.join(self.repo.root_dir, *parts)\n\n    @functools.cached_property\n    def _datafss(self):\n        \"\"\"Keep a datafs instance of each repo.\"\"\"\n\n        datafss = {}\n\n        if hasattr(self.repo, \"dvc_dir\"):\n            key = self._get_key(self.repo.root_dir)\n            datafss[key] = DataFileSystem(index=self.repo.index.data[\"repo\"])\n\n        return datafss\n\n    @property\n    def repo_url(self):\n        return self.repo.url\n\n    @classmethod\n    def _make_repo(cls, **kwargs) -> \"Repo\":\n        from dvc.repo import Repo\n\n        with Repo.open(uninitialized=True, **kwargs) as repo:\n            return repo\n\n    def _get_repo(self, key: Key) -> \"Repo\":\n        \"\"\"Returns repo that the path falls in, using prefix.\n\n        If the path is already tracked/collected, it just returns the repo.\n\n        Otherwise, it collects the repos that might be in the path's parents\n        and then returns the appropriate one.\n        \"\"\"\n        repo = self._subrepos_trie.get(key)\n        if repo:\n            return repo\n\n        prefix_key, repo = self._subrepos_trie.longest_prefix(key)\n        dir_keys = (key[:i] for i in range(len(prefix_key) + 1, len(key) + 1))\n        self._update(dir_keys, starting_repo=repo)\n        return self._subrepos_trie.get(key) or self.repo\n\n    @wrap_with(threading.Lock())\n    def _update(self, dir_keys, starting_repo):\n        \"\"\"Checks for subrepo in directories and updates them.\"\"\"\n        repo = starting_repo\n        for key in dir_keys:\n            d = self._from_key(key)\n            if self._is_dvc_repo(d):\n                repo = self.repo_factory(\n                    d,\n                    fs=self.repo.fs,\n                    scm=self.repo.scm,\n                    repo_factory=self.repo_factory,\n                )\n                self._repo_stack.enter_context(repo)\n                self._datafss[key] = DataFileSystem(index=repo.index.data[\"repo\"])\n            self._subrepos_trie[key] = repo\n\n    def _is_dvc_repo(self, dir_path):\n        \"\"\"Check if the directory is a dvc repo.\"\"\"\n        if not self._traverse_subrepos:\n            return False\n\n        from dvc.repo import Repo\n\n        repo_path = self.repo.fs.join(dir_path, Repo.DVC_DIR)\n        return self.repo.fs.isdir(repo_path)\n\n    def _get_subrepo_info(\n        self, key: Key\n    ) -> tuple[\"Repo\", Optional[DataFileSystem], Key]:\n        \"\"\"\n        Returns information about the subrepo the key is part of.\n        \"\"\"\n        repo = self._get_repo(key)\n        repo_key: Key\n        if repo is self.repo:\n            repo_key = ()\n            subkey = key\n        else:\n            repo_key = self._get_key(repo.root_dir)\n            subkey = key[len(repo_key) :]\n\n        dvc_fs = self._datafss.get(repo_key)\n        return repo, dvc_fs, subkey\n\n    def _open(self, path, mode=\"rb\", **kwargs):\n        if mode != \"rb\":\n            raise OSError(errno.EROFS, os.strerror(errno.EROFS))\n\n        key = self._get_key_from_relative(path)\n        fs_path = self._from_key(key)\n        try:\n            return self.repo.fs.open(fs_path, mode=mode)\n        except FileNotFoundError:\n            _, dvc_fs, subkey = self._get_subrepo_info(key)\n            if not dvc_fs:\n                raise\n\n        dvc_path = _get_dvc_path(dvc_fs, subkey)\n        return dvc_fs.open(dvc_path, mode=mode, cache=kwargs.get(\"cache\", False))\n\n    def isdvc(self, path, **kwargs) -> bool:\n        \"\"\"Is this entry dvc-tracked?\"\"\"\n        try:\n            return self.info(path).get(\"dvc_info\", {}).get(\"isout\", False)\n        except FileNotFoundError:\n            return False\n\n    def ls(self, path, detail=True, dvc_only=False, **kwargs):  # noqa: C901, PLR0912\n        key = self._get_key_from_relative(path)\n        repo, dvc_fs, subkey = self._get_subrepo_info(key)\n\n        dvc_infos = {}\n        dvc_info = {}\n        if dvc_fs:\n            dvc_path = _get_dvc_path(dvc_fs, subkey)\n            with suppress(FileNotFoundError):\n                dvc_info = dvc_fs.info(dvc_path)\n                if dvc_info[\"type\"] == \"file\":\n                    dvc_infos[\"\"] = dvc_info\n                else:\n                    for info in dvc_fs.ls(dvc_path, detail=True):\n                        dvc_infos[dvc_fs.name(info[\"name\"])] = info\n\n        fs_infos = {}\n        fs_info = {}\n        ignore_subrepos = kwargs.get(\"ignore_subrepos\", True)\n        if not dvc_only:\n            fs = self.repo.fs\n            fs_path = self._from_key(key)\n            try:\n                fs_info = fs.info(fs_path)\n                if fs_info[\"type\"] == \"file\":\n                    fs_infos[\"\"] = fs_info\n                else:\n                    for info in repo.dvcignore.ls(\n                        fs, fs_path, detail=True, ignore_subrepos=ignore_subrepos\n                    ):\n                        fs_infos[fs.name(info[\"name\"])] = info\n            except (FileNotFoundError, NotADirectoryError):\n                pass\n\n        if not (fs_info or dvc_info):\n            # broken symlink or TreeError\n            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)\n\n        if fs_info and dvc_info and dvc_info[\"type\"] != fs_info[\"type\"]:\n            dvc_infos.clear()  # invalidate dvc_info if file type differs\n\n        dvcfiles = kwargs.get(\"dvcfiles\", False)\n\n        infos = []\n        paths = []\n        names = set(dvc_infos.keys()) | set(fs_infos.keys())\n\n        for name in names:\n            if not dvcfiles and _is_dvc_file(name):\n                continue\n\n            entry_path = self.join(path, name) if name else path\n            info = _merge_info(\n                repo, (*subkey, name), fs_infos.get(name), dvc_infos.get(name)\n            )\n            info[\"name\"] = entry_path\n            infos.append(info)\n            paths.append(entry_path)\n\n        if not detail:\n            return paths\n\n        return infos\n\n    def info(self, path, **kwargs):\n        key = self._get_key_from_relative(path)\n        ignore_subrepos = kwargs.get(\"ignore_subrepos\", True)\n        return self._info(key, path, ignore_subrepos=ignore_subrepos)\n\n    def _info(  # noqa: C901\n        self, key, path, ignore_subrepos=True, check_ignored=True\n    ):\n        repo, dvc_fs, subkey = self._get_subrepo_info(key)\n\n        dvc_info = None\n        if dvc_fs:\n            try:\n                dvc_info = dvc_fs.fs.index.info(subkey)\n                dvc_path = _get_dvc_path(dvc_fs, subkey)\n                dvc_info[\"name\"] = dvc_path\n            except KeyError:\n                pass\n\n        fs_info = None\n        fs = self.repo.fs\n        fs_path = self._from_key(key)\n        try:\n            fs_info = fs.info(fs_path)\n            if check_ignored and repo.dvcignore.is_ignored(\n                fs, fs_path, ignore_subrepos=ignore_subrepos\n            ):\n                fs_info = None\n        except (FileNotFoundError, NotADirectoryError):\n            if not dvc_info:\n                raise\n\n        # NOTE: if some parent in fs_path turns out to be a file, it means\n        # that the whole repofs branch doesn't exist.\n        if dvc_info and not fs_info:\n            for parent in fs.parents(fs_path):\n                try:\n                    if fs.info(parent)[\"type\"] != \"directory\":\n                        dvc_info = None\n                        break\n                except FileNotFoundError:\n                    continue\n\n        if not dvc_info and not fs_info:\n            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)\n\n        info = _merge_info(repo, subkey, fs_info, dvc_info)\n        info[\"name\"] = path\n        return info\n\n    def get(\n        self,\n        rpath,\n        lpath,\n        recursive=False,\n        callback=DEFAULT_CALLBACK,\n        maxdepth=None,\n        batch_size=None,\n        **kwargs,\n    ):\n        self._get(\n            rpath,\n            lpath,\n            recursive=recursive,\n            callback=callback,\n            maxdepth=maxdepth,\n            batch_size=batch_size,\n            **kwargs,\n        )\n\n    def _get(  # noqa: C901, PLR0912, PLR0915\n        self,\n        rpath,\n        lpath,\n        recursive=False,\n        callback=DEFAULT_CALLBACK,\n        maxdepth=None,\n        batch_size=None,\n        **kwargs,\n    ) -> list[tuple[str, str, Optional[dict]]]:\n        if (\n            isinstance(rpath, list)\n            or isinstance(lpath, list)\n            or has_magic(rpath)\n            or not self.exists(rpath)\n            or not recursive\n        ):\n            super().get(\n                rpath,\n                lpath,\n                recursive=recursive,\n                callback=callback,\n                maxdepth=maxdepth,\n                **kwargs,\n            )\n            return []\n\n        if os.path.isdir(lpath) or lpath.endswith(os.path.sep):\n            lpath = self.join(lpath, os.path.basename(rpath))\n\n        if self.isfile(rpath):\n            with callback.branched(rpath, lpath) as child:\n                self.get_file(rpath, lpath, callback=child, **kwargs)\n                return [(rpath, lpath, None)]\n\n        result: list[tuple[str, str, Optional[dict]]] = []\n        _dirs: list[str] = []\n        _files: dict[FileSystem, list[tuple[str, str, Optional[dict]]]]\n        _files = defaultdict(list)\n\n        for root, dirs, files in self.walk(rpath, maxdepth=maxdepth, detail=True):\n            if files:\n                callback.set_size((callback.size or 0) + len(files))\n\n            parts = self.relparts(root, rpath)\n            if parts in ((os.curdir,), (\"\",)):\n                parts = ()\n            dest_root = os.path.join(lpath, *parts)\n            if not maxdepth or len(parts) < maxdepth - 1:\n                _dirs.extend(f\"{dest_root}{os.path.sep}{d}\" for d in dirs)\n\n            key = self._get_key_from_relative(root)\n            _, dvc_fs, _ = self._get_subrepo_info(key)\n\n            for name, info in files.items():\n                dvc_info = info.get(\"dvc_info\")\n                fs_info = info.get(\"fs_info\")\n                if dvc_fs and dvc_info and not fs_info:\n                    fs = dvc_fs\n                    fs_path = dvc_info[\"name\"]\n                else:\n                    fs = self.repo.fs\n                    fs_path = fs_info[\"name\"]\n\n                src_path = f\"{root}{self.sep}{name}\"\n                dest_path = f\"{dest_root}{os.path.sep}{name}\"\n                _files[fs].append((fs_path, dest_path, dvc_info))\n                result.append((src_path, dest_path, info))\n\n        os.makedirs(lpath, exist_ok=True)\n        for d in _dirs:\n            os.makedirs(d, exist_ok=True)\n\n        def get_file(arg: tuple[FileSystem, tuple[str, str, Optional[dict]]]):\n            fs, (src, dest, info) = arg\n            kw = kwargs\n            if isinstance(fs, DataFileSystem):\n                kw = kw | {\"info\": info}\n            with callback.branched(src, dest) as child:\n                fs.get_file(src, dest, callback=child, **kw)\n\n        if batch_size == 1:\n            ctx: AbstractContextManager = nullcontext()\n            map_fn: Callable = map\n        else:\n            ctx = ThreadPoolExecutor(max_workers=batch_size)\n            map_fn = ctx.imap_unordered\n\n        with ctx:\n            it = ((fs, f) for fs, files in _files.items() for f in files)\n            deque(callback.wrap(map_fn(get_file, it)), maxlen=0)\n        return result\n\n    def get_file(self, rpath, lpath, **kwargs):\n        dvc_info = kwargs.pop(\"info\", {}).pop(\"dvc_info\", None)\n        key = self._get_key_from_relative(rpath)\n        fs_path = self._from_key(key)\n        dirpath = os.path.dirname(lpath)\n        if dirpath:\n            # makedirs raises error if the string is empty\n            os.makedirs(dirpath, exist_ok=True)\n\n        try:\n            return self.repo.fs.get_file(fs_path, lpath, **kwargs)\n        except FileNotFoundError:\n            _, dvc_fs, subkey = self._get_subrepo_info(key)\n            if not dvc_fs:\n                raise\n\n        dvc_path = _get_dvc_path(dvc_fs, subkey)\n        return dvc_fs.get_file(dvc_path, lpath, info=dvc_info, **kwargs)\n\n    def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):\n        if maxdepth is not None:\n            raise NotImplementedError\n\n        sizes = {}\n        dus = {}\n        todo = deque([self.info(path)])\n        while todo:\n            info = todo.popleft()\n            isdir = info[\"type\"] == \"directory\"\n            size = info[\"size\"] or 0\n            name = info[\"name\"]\n\n            if not isdir:\n                sizes[name] = size\n                continue\n\n            dvc_info = info.get(\"dvc_info\") or {}\n            fs_info = info.get(\"fs_info\")\n            entry = dvc_info.get(\"entry\")\n            if (\n                dvc_info\n                and not fs_info\n                and entry is not None\n                and entry.size is not None\n            ):\n                dus[name] = entry.size\n                continue\n\n            if withdirs:\n                sizes[name] = size\n\n            todo.extend(self.ls(info[\"name\"], detail=True))\n\n        if total:\n            return sum(sizes.values()) + sum(dus.values())\n\n        return sizes\n\n    def close(self):\n        self._repo_stack.close()\n\n\nclass DVCFileSystem(FileSystem):\n    protocol = \"local\"\n    PARAM_CHECKSUM = \"md5\"\n\n    def _prepare_credentials(self, **config) -> dict[str, Any]:\n        return config\n\n    @functools.cached_property\n    def fs(self) -> \"_DVCFileSystem\":\n        return _DVCFileSystem(**self.fs_args)\n\n    @property\n    def immutable(self):\n        from dvc.scm import NoSCM\n\n        if isinstance(self.fs.repo.scm, NoSCM):\n            return False\n\n        return self.fs._repo_kwargs.get(\"rev\") == self.fs.repo.get_rev()\n\n    def getcwd(self):\n        return self.fs.getcwd()\n\n    def _get(\n        self,\n        from_info: Union[AnyFSPath, list[AnyFSPath]],\n        to_info: Union[AnyFSPath, list[AnyFSPath]],\n        callback: \"Callback\" = DEFAULT_CALLBACK,\n        recursive: bool = False,\n        batch_size: Optional[int] = None,\n        **kwargs,\n    ) -> list[tuple[str, str, Optional[dict]]]:\n        # FileSystem.get is non-recursive by default if arguments are lists\n        # otherwise, it's recursive.\n        recursive = not (isinstance(from_info, list) and isinstance(to_info, list))\n        return self.fs._get(\n            from_info,\n            to_info,\n            callback=callback,\n            recursive=recursive,\n            batch_size=batch_size,\n            **kwargs,\n        )\n\n    def get(\n        self,\n        from_info: Union[AnyFSPath, list[AnyFSPath]],\n        to_info: Union[AnyFSPath, list[AnyFSPath]],\n        callback: \"Callback\" = DEFAULT_CALLBACK,\n        recursive: bool = False,\n        batch_size: Optional[int] = None,\n        **kwargs,\n    ) -> None:\n        self._get(\n            from_info,\n            to_info,\n            callback=callback,\n            batch_size=batch_size,\n            recursive=recursive,\n            **kwargs,\n        )\n\n    @property\n    def fsid(self) -> str:\n        return self.fs.fsid\n\n    def isdvc(self, path, **kwargs) -> bool:\n        return self.fs.isdvc(path, **kwargs)\n\n    @property\n    def repo(self) -> \"Repo\":\n        return self.fs.repo\n\n    @property\n    def repo_url(self) -> str:\n        return self.fs.repo_url\n\n    def from_os_path(self, path: str) -> str:\n        if os.path.isabs(path) or (\n            os.name == \"nt\" and posixpath.isabs(path) and ntpath.sep not in path\n        ):\n            path = os.path.relpath(path, self.repo.root_dir)\n        return as_posix(path)\n\n    def close(self):\n        if \"fs\" in self.__dict__:\n            self.fs.close()\n"
  },
  {
    "path": "dvc/fs/dvc_path.py",
    "content": "\"\"\"UPath implementation for DVCFileSystem.\n\nThis provides a `pathlib.Path` like interface to\nwork with DVCFileSystem.\n\nExamples\n--------\n\n>>> from upath import UPath\n\n>>> local = UPath(\"dvc://path/to/local/repo\")\n>>> https = UPath(\"dvc+https://github.com/iterative/example-get-started\", rev=\"main\")\n>>> ssh = UPath(\"dvc+ssh://git@github.com:iterative/example-get-started.git\")\n\"\"\"\n\nfrom urllib.parse import urlsplit\n\nfrom upath import UPath  # ty: ignore[unresolved-import]\n\n\nclass DVCPath(UPath):\n    @classmethod\n    def _transform_init_args(cls, args, protocol, storage_options):\n        if not args:\n            args = (\"/\",)\n        elif (\n            args\n            and \"url\" not in storage_options\n            and protocol in {\"dvc+http\", \"dvc+https\", \"dvc+ssh\"}\n        ):\n            url, *rest = args\n            url = urlsplit(str(url))\n            proto = protocol.split(\"+\")[1]\n            if proto == \"ssh\":\n                base_url = url.netloc + url.path\n            else:\n                base_url = url._replace(scheme=proto).geturl()\n            storage_options[\"url\"] = base_url\n            # Assume the given path is a root url\n            args = (\"/\", *rest)\n        return super()._transform_init_args(args, \"dvc\", storage_options)\n\n    def __str__(self):\n        s = super().__str__()\n        if url := self.storage_options.get(\"url\"):\n            return s.replace(\"dvc://\", f\"dvc+{url}\", 1)\n        return s\n\n    def with_segments(self, *pathsegments):\n        obj = super().with_segments(*pathsegments)\n        # cache filesystem, as dvcfs does not cache filesystem\n        # caveat: any joinpath operation will instantiate filesystem\n        obj._fs_cached = self.fs\n        return obj\n"
  },
  {
    "path": "dvc/fs/git.py",
    "content": "import functools\nfrom typing import TYPE_CHECKING, Any, Optional\n\nfrom . import FileSystem\n\nif TYPE_CHECKING:\n    from scmrepo.fs import GitFileSystem as FsspecGitFileSystem\n    from scmrepo.git.objects import GitTrie\n\n    from dvc.scm import Git\n\n\nclass GitFileSystem(FileSystem):\n    \"\"\"Proxies the repo file access methods to Git objects\"\"\"\n\n    protocol = \"local\"\n    PARAM_CHECKSUM = \"md5\"\n\n    def __init__(\n        self,\n        path: Optional[str] = None,\n        rev: Optional[str] = None,\n        scm: Optional[\"Git\"] = None,\n        trie: Optional[\"GitTrie\"] = None,\n        **kwargs: Any,\n    ) -> None:\n        from dvc.scm import resolve_rev\n\n        super().__init__()\n        self.fs_args.update(\n            {\n                \"path\": path,\n                \"rev\": rev,\n                \"scm\": scm,\n                \"trie\": trie,\n                \"rev_resolver\": resolve_rev,\n                **kwargs,\n            }\n        )\n\n    @functools.cached_property\n    def fs(self) -> \"FsspecGitFileSystem\":\n        from scmrepo.fs import GitFileSystem as FsspecGitFileSystem\n\n        return FsspecGitFileSystem(**self.fs_args)\n\n    def getcwd(self):\n        return self.fs.getcwd()\n\n    def chdir(self, path):\n        self.fs.chdir(path)\n\n    @property\n    def rev(self) -> str:\n        return self.fs.rev\n\n    def ls(self, path, detail=True, **kwargs):\n        return self.fs.ls(path, detail=detail, **kwargs) or []\n"
  },
  {
    "path": "dvc/ignore.py",
    "content": "import functools\nimport os\nimport re\nfrom collections.abc import Iterable, Iterator\nfrom itertools import chain, groupby, takewhile\nfrom typing import TYPE_CHECKING, Any, Literal, NamedTuple, Optional, Union, overload\n\ntry:\n    from pathspec.patterns.gitignore.spec import (  # type: ignore[import-not-found]\n        GitIgnoreSpecPattern,\n    )\nexcept ImportError:  # pathspec<1\n    from pathspec.patterns import (\n        GitWildMatchPattern as GitIgnoreSpecPattern,\n    )\n\nfrom pathspec.util import normalize_file\nfrom pygtrie import Trie\n\nfrom dvc.fs import Schemes, localfs\nfrom dvc.log import logger\nfrom dvc.pathspec_math import PatternInfo, merge_patterns\n\nif TYPE_CHECKING:\n    from typing_extensions import Self\n\n    from dvc.fs import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\nclass DvcIgnore:\n    DVCIGNORE_FILE = \".dvcignore\"\n\n    def __call__(\n        self, root: str, dirs: list[str], files: list[str]\n    ) -> tuple[list[str], list[str]]:\n        raise NotImplementedError\n\n\nclass DvcIgnorePatterns(DvcIgnore):\n    def __init__(\n        self, pattern_list: Iterable[Union[PatternInfo, str]], dirname: str, sep: str\n    ) -> None:\n        try:\n            from pathspec.patterns.gitignore.spec import (  # type: ignore[import-not-found]\n                _DIR_MARK,\n            )\n        except ImportError:  # pathspec<1\n            from pathspec.patterns.gitwildmatch import (  # type: ignore[attr-defined, no-redef]\n                _DIR_MARK,\n            )\n\n        pattern_infos = [\n            pattern if isinstance(pattern, PatternInfo) else PatternInfo(pattern, \"\")\n            for pattern in pattern_list\n        ]\n\n        self.sep = sep\n        self.pattern_list: list[PatternInfo] = []\n        self.dirname = dirname\n        self.find_matching_pattern = functools.cache(self._find_matching_pattern)\n\n        regex_pattern_list: list[tuple[str, bool, bool, PatternInfo]] = []\n        for count, pattern_info in enumerate(pattern_infos):\n            regex, ignore = GitIgnoreSpecPattern.pattern_to_regex(pattern_info.patterns)\n            if regex is not None and ignore is not None:\n                self.pattern_list.append(pattern_info)\n                regex = regex.replace(f\"<{_DIR_MARK}>\", f\"<{_DIR_MARK}{count}>\")\n                regex_pattern_list.append(\n                    (regex, ignore, pattern_info.patterns.endswith(\"/\"), pattern_info)\n                )\n\n        def keyfunc(item: tuple[str, bool, bool, PatternInfo]) -> tuple[bool, bool]:\n            _, ignore, dir_only_pattern, _ = item\n            return ignore, dir_only_pattern\n\n        self.ignore_spec: list[\n            tuple[\n                re.Pattern[str],\n                bool,\n                bool,\n                dict[Optional[str], tuple[str, PatternInfo]],\n            ]\n        ]\n        self.ignore_spec = []\n        for (ignore, dir_only_pattern), group in groupby(\n            regex_pattern_list, key=keyfunc\n        ):\n            if ignore:\n                # For performance, we combine all exclude patterns.\n                # But we still need to figure out which pattern matched which rule,\n                # (eg: to show in `dvc check-ignore`).\n                # So, we use named groups and keep a map of group name to pattern.\n                pattern_map: dict[Optional[str], tuple[str, PatternInfo]] = {\n                    f\"rule_{i}\": (regex, pi)\n                    for i, (regex, _, _, pi) in enumerate(group)\n                }\n                combined_regex = \"|\".join(\n                    f\"(?P<{name}>{regex})\" for name, (regex, _) in pattern_map.items()\n                )\n                self.ignore_spec.append(\n                    (re.compile(combined_regex), ignore, dir_only_pattern, pattern_map)\n                )\n            else:\n                # unignored patterns are not combined with `|`.\n                for regex, _, _, pi in group:\n                    pattern_map = {None: (regex, pi)}\n                    self.ignore_spec.append(\n                        (re.compile(regex), ignore, dir_only_pattern, pattern_map)\n                    )\n\n    @classmethod\n    def from_file(cls, path: str, fs: \"FileSystem\", name: str) -> \"Self\":\n        assert fs.isabs(path)\n        dirname = fs.normpath(fs.dirname(path))\n        with fs.open(path, encoding=\"utf-8\") as fobj:\n            path_spec_lines = [\n                PatternInfo(line, f\"{name}:{line_no + 1}:{line}\")\n                for line_no, line in enumerate(map(str.strip, fobj.readlines()))\n                if line and not (line.strip().startswith(\"#\"))\n            ]\n\n        return cls(path_spec_lines, dirname, fs.sep)\n\n    def __call__(\n        self, root: str, dirs: list[str], files: list[str]\n    ) -> tuple[list[str], list[str]]:\n        files = [f for f in files if not self.matches(root, f)]\n        dirs = [d for d in dirs if not self.matches(root, d, True)]\n\n        return dirs, files\n\n    def _get_normalize_path(self, dirname: str, basename: str) -> Optional[str]:\n        # NOTE: `relpath` is too slow, so we have to assume that both\n        # `dirname` and `self.dirname` are relative or absolute together.\n\n        prefix = self.dirname.rstrip(self.sep) + self.sep\n\n        if dirname == self.dirname:\n            path = basename\n        elif dirname.startswith(prefix):\n            rel = dirname[len(prefix) :]\n            # NOTE: `os.path.join` is ~x5.5 slower\n            path = f\"{rel}{self.sep}{basename}\"\n        else:\n            return None\n\n        if os.name == \"nt\":\n            return normalize_file(path)\n        return path\n\n    @overload\n    def matches(\n        self,\n        dirname: str,\n        basename: str,\n        is_dir: bool = False,\n        details: Literal[False] = ...,\n    ) -> bool: ...\n\n    @overload\n    def matches(\n        self,\n        dirname: str,\n        basename: str,\n        is_dir: bool = False,\n        details: Literal[True] = ...,\n    ) -> tuple[bool, list[PatternInfo]]: ...\n\n    @overload\n    def matches(\n        self,\n        dirname: str,\n        basename: str,\n        is_dir: bool = False,\n        details: bool = False,\n    ) -> Union[bool, tuple[bool, list[PatternInfo]]]: ...\n\n    def matches(\n        self,\n        dirname: str,\n        basename: str,\n        is_dir: bool = False,\n        details: bool = False,\n    ) -> Union[bool, tuple[bool, list[PatternInfo]]]:\n        path = self._get_normalize_path(dirname, basename)\n        result = False\n        _match: list[PatternInfo] = []\n        if path:\n            result, _match = self._ignore(path, is_dir)\n        return (result, _match) if details else result\n\n    def _find_matching_pattern(\n        self, path: str, is_dir: bool\n    ) -> tuple[bool, list[PatternInfo]]:\n        paths = [path]\n        if is_dir and not path.endswith(\"/\"):\n            paths.append(f\"{path}/\")\n\n        for pattern, ignore, dir_only_pattern, pattern_map in reversed(\n            self.ignore_spec\n        ):\n            if dir_only_pattern and not is_dir:\n                continue\n            for p in paths:\n                match = pattern.match(p)\n                if not match:\n                    continue\n                if ignore:\n                    group_name, _match = next(\n                        (\n                            (name, _match)\n                            for name, _match in match.groupdict().items()\n                            if name.startswith(\"rule_\") and _match is not None\n                        )\n                    )\n                else:\n                    # unignored patterns are not combined with `|`,\n                    # so there are no groups.\n                    group_name = None\n                _regex, pattern_info = pattern_map[group_name]\n                return ignore, [pattern_info]\n        return False, []\n\n    def _ignore(self, path: str, is_dir: bool) -> tuple[bool, list[PatternInfo]]:\n        parts = path.split(\"/\")\n        result = False\n        matches: list[PatternInfo] = []\n        for i in range(1, len(parts) + 1):\n            rel_path = \"/\".join(parts[:i])\n            result, _matches = self.find_matching_pattern(\n                rel_path, is_dir or i < len(parts)\n            )\n            if i < len(parts) and not result:\n                continue\n            matches.extend(_matches)\n            if result:\n                break\n        return result, matches\n\n    def __hash__(self) -> int:\n        return hash(self.dirname + \":\" + str(self.pattern_list))\n\n    def __eq__(self, other: object) -> bool:\n        if not isinstance(other, DvcIgnorePatterns):\n            return NotImplemented\n        return (self.dirname == other.dirname) & (\n            [pattern.patterns for pattern in self.pattern_list]\n            == [pattern.patterns for pattern in other.pattern_list]\n        )\n\n    def __bool__(self) -> bool:\n        return bool(self.pattern_list)\n\n\nclass CheckIgnoreResult(NamedTuple):\n    file: str\n    match: bool\n    pattern_infos: list[PatternInfo]\n\n\nclass DvcIgnoreFilter:\n    def __init__(self, fs: \"FileSystem\", root_dir: str) -> None:\n        from dvc.repo import Repo\n\n        default_ignore_patterns = [\n            \".hg/\",\n            \".git/\",\n            \".git\",\n            f\"{Repo.DVC_DIR}/\",\n        ]\n\n        self.fs = fs\n        self.root_dir = root_dir\n        self.ignores_trie_fs = Trie()\n        self._ignores_trie_subrepos = Trie()\n\n        key = self._get_key(root_dir)\n        self.ignores_trie_fs[key] = DvcIgnorePatterns(\n            default_ignore_patterns,\n            root_dir,\n            fs.sep,\n        )\n        self._ignores_trie_subrepos[key] = self.ignores_trie_fs[key]\n        self._update(\n            self.root_dir,\n            self._ignores_trie_subrepos,\n            dnames=None,\n            ignore_subrepos=False,\n        )\n        self._update(\n            self.root_dir,\n            self.ignores_trie_fs,\n            dnames=None,\n            ignore_subrepos=True,\n        )\n\n    def _get_key(self, path: str) -> tuple[str, ...]:\n        parts = self.fs.relparts(path, self.root_dir)\n        if parts == (os.curdir,):\n            return ()\n        return parts\n\n    def _update_trie(self, dirname: str, trie: Trie) -> None:\n        key = self._get_key(dirname)\n        old_pattern = trie.longest_prefix(key).value\n        matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False)\n\n        path = self.fs.join(dirname, DvcIgnore.DVCIGNORE_FILE)\n        if not matches and self.fs.exists(path):\n            name = self.fs.relpath(path, self.root_dir)\n            new_pattern = DvcIgnorePatterns.from_file(path, self.fs, name)\n            if old_pattern:\n                plist, prefix = merge_patterns(\n                    self.fs.flavour,\n                    old_pattern.pattern_list,\n                    old_pattern.dirname,\n                    new_pattern.pattern_list,\n                    new_pattern.dirname,\n                )\n                trie[key] = DvcIgnorePatterns(plist, prefix, self.fs.sep)\n            else:\n                trie[key] = new_pattern\n        elif old_pattern:\n            trie[key] = old_pattern\n\n    def _update(\n        self,\n        dirname: str,\n        ignore_trie: Trie,\n        dnames: Optional[\"list\"],\n        ignore_subrepos: bool,\n    ) -> None:\n        self._update_trie(dirname, ignore_trie)\n\n        if ignore_subrepos:\n            if dnames is None:\n                try:\n                    _, dnames, _ = next(self.fs.walk(dirname))\n                except StopIteration:\n                    dnames = []\n\n            for dname in dnames:\n                self._update_sub_repo(self.fs.join(dirname, dname), ignore_trie)\n\n    def _update_sub_repo(self, path: str, ignore_trie: Trie) -> None:\n        from dvc.repo import Repo\n\n        if path == self.root_dir:\n            return\n\n        dvc_dir = self.fs.join(path, Repo.DVC_DIR)\n        if not self.fs.exists(dvc_dir):\n            return\n\n        root, dname = self.fs.split(path)\n        key = self._get_key(root)\n        pattern_info = PatternInfo(f\"/{dname}/\", f\"in sub_repo:{dname}\")\n        new_pattern = DvcIgnorePatterns([pattern_info], root, self.fs.sep)\n        old_pattern = ignore_trie.longest_prefix(key).value\n        if old_pattern:\n            plist, prefix = merge_patterns(\n                self.fs.flavour,\n                old_pattern.pattern_list,\n                old_pattern.dirname,\n                new_pattern.pattern_list,\n                new_pattern.dirname,\n            )\n            ignore_trie[key] = DvcIgnorePatterns(plist, prefix, self.fs.sep)\n        else:\n            ignore_trie[key] = new_pattern\n\n    def __call__(\n        self, root: str, dirs: list[str], files: list[str], ignore_subrepos: bool = True\n    ) -> tuple[list[str], list[str]]:\n        abs_root = self.fs.abspath(root)\n        ignore_pattern = self._get_trie_pattern(\n            abs_root, dnames=dirs, ignore_subrepos=ignore_subrepos\n        )\n        if ignore_pattern:\n            dirs, files = ignore_pattern(abs_root, dirs, files)\n        return dirs, files\n\n    @overload\n    def ls(\n        self, fs: \"FileSystem\", path: str, detail: Literal[True], **kwargs: Any\n    ) -> list[dict[str, Any]]: ...\n\n    @overload\n    def ls(\n        self, fs: \"FileSystem\", path: str, detail: Literal[False], **kwargs\n    ) -> list[str]: ...\n\n    @overload\n    def ls(\n        self, fs: \"FileSystem\", path: str, detail: bool = True, **kwargs\n    ) -> Union[list[str], list[dict[str, Any]]]: ...\n\n    def ls(\n        self, fs: \"FileSystem\", path: str, detail: bool = True, **kwargs: Any\n    ) -> Union[list[str], list[dict[str, Any]]]:\n        fs_dict = {}\n        dirs = []\n        nondirs = []\n\n        for entry in fs.ls(path, detail=True, **kwargs):\n            name = fs.name(entry[\"name\"])\n            fs_dict[name] = entry\n            if entry[\"type\"] == \"directory\":\n                dirs.append(name)\n            else:\n                nondirs.append(name)\n\n        dirs, nondirs = self(path, dirs, nondirs, **kwargs)\n\n        if not detail:\n            return dirs + nondirs\n\n        return [fs_dict[name] for name in chain(dirs, nondirs)]\n\n    def walk(\n        self, fs: \"FileSystem\", path: str, **kwargs: Any\n    ) -> Iterator[\n        Union[\n            tuple[str, list[str], list[str]],\n            tuple[str, dict[str, dict], dict[str, dict]],\n        ]\n    ]:\n        detail = kwargs.get(\"detail\", False)\n        ignore_subrepos = kwargs.pop(\"ignore_subrepos\", True)\n        if fs.protocol == Schemes.LOCAL:\n            for root, dirs, files in fs.walk(path, **kwargs):\n                if detail:\n                    assert isinstance(dirs, dict)\n                    assert isinstance(files, dict)\n                    dnames, fnames = self(\n                        root,\n                        list(dirs),\n                        list(files),\n                        ignore_subrepos=ignore_subrepos,\n                    )\n                    list(map(dirs.pop, dirs.keys() - set(dnames)))\n                    list(map(files.pop, files.keys() - set(fnames)))\n                else:\n                    dirs[:], files[:] = self(\n                        root, dirs, files, ignore_subrepos=ignore_subrepos\n                    )\n                yield root, dirs, files\n        else:\n            yield from fs.walk(path, **kwargs)\n\n    def find(self, fs: \"FileSystem\", path: str, **kwargs: Any) -> Iterator[str]:\n        if fs.protocol == Schemes.LOCAL:\n            for root, _, files in self.walk(fs, path, **kwargs):\n                for file in files:\n                    # NOTE: os.path.join is ~5.5 times slower\n                    yield f\"{root}{fs.sep}{file}\"\n        else:\n            yield from fs.find(path)\n\n    def _get_trie_pattern(\n        self, dirname: str, dnames: Optional[list[str]] = None, ignore_subrepos=True\n    ) -> Optional[\"DvcIgnorePatterns\"]:\n        if ignore_subrepos:\n            ignores_trie = self.ignores_trie_fs\n        else:\n            ignores_trie = self._ignores_trie_subrepos\n\n        if not self.fs.isin_or_eq(dirname, self.root_dir):\n            # outside of the repo\n            return None\n\n        key = self._get_key(dirname)\n\n        ignore_pattern = ignores_trie.get(key)\n        if ignore_pattern:\n            return ignore_pattern\n\n        prefix_key = ignores_trie.longest_prefix(key).key or ()\n        prefix = self.fs.join(self.root_dir, *prefix_key)\n\n        dirs = list(\n            takewhile(\n                lambda path: path != prefix,\n                (parent for parent in localfs.parents(dirname)),\n            )\n        )\n        dirs.reverse()\n        dirs.append(dirname)\n\n        for parent in dirs:\n            self._update(parent, ignores_trie, dnames, ignore_subrepos)\n\n        return ignores_trie.get(key)\n\n    def _is_ignored(\n        self, path: str, is_dir: bool = False, ignore_subrepos: bool = True\n    ) -> bool:\n        if self._outside_repo(path):\n            return False\n        dirname, basename = self.fs.split(self.fs.normpath(path))\n        ignore_pattern = self._get_trie_pattern(dirname, None, ignore_subrepos)\n        if ignore_pattern:\n            return ignore_pattern.matches(dirname, basename, is_dir)\n        return False\n\n    def is_ignored_dir(self, path: str, ignore_subrepos: bool = True) -> bool:\n        # only used in LocalFileSystem\n        path = self.fs.abspath(path)\n        if path == self.root_dir:\n            return False\n\n        return self._is_ignored(path, True, ignore_subrepos=ignore_subrepos)\n\n    def is_ignored_file(self, path: str, ignore_subrepos: bool = True) -> bool:\n        # only used in LocalFileSystem\n        path = self.fs.abspath(path)\n        return self._is_ignored(path, False, ignore_subrepos=ignore_subrepos)\n\n    def _outside_repo(self, path: str) -> bool:\n        return not self.fs.isin_or_eq(path, self.root_dir)\n\n    def check_ignore(self, target: str) -> CheckIgnoreResult:\n        # NOTE: can only be used in `dvc check-ignore`, see\n        # https://github.com/treeverse/dvc/issues/5046\n        full_target = self.fs.abspath(target)\n        matched_patterns: list[PatternInfo] = []\n        ignore = False\n        if not self._outside_repo(full_target):\n            dirname, basename = self.fs.split(self.fs.normpath(full_target))\n            pattern = self._get_trie_pattern(dirname)\n            if pattern:\n                ignore, matched_patterns = pattern.matches(\n                    dirname, basename, self.fs.isdir(full_target), details=True\n                )\n        return CheckIgnoreResult(target, ignore, matched_patterns)\n\n    def is_ignored(\n        self, fs: \"FileSystem\", path: str, ignore_subrepos: bool = True\n    ) -> bool:\n        # NOTE: can't use self.check_ignore(path).match for now, see\n        # https://github.com/treeverse/dvc/issues/4555\n        if fs.protocol != Schemes.LOCAL:\n            return False\n        if fs.isfile(path):\n            return self.is_ignored_file(path, ignore_subrepos)\n        if fs.isdir(path):\n            return self.is_ignored_dir(path, ignore_subrepos)\n        return self.is_ignored_file(path, ignore_subrepos) or self.is_ignored_dir(\n            path, ignore_subrepos\n        )\n\n\ndef init(path: Union[str, os.PathLike[str]]) -> str:\n    dvcignore = os.path.join(path, DvcIgnore.DVCIGNORE_FILE)\n    if os.path.exists(dvcignore):\n        return dvcignore\n\n    with open(dvcignore, \"w\", encoding=\"utf-8\") as fobj:\n        fobj.write(\n            \"# Add patterns of files dvc should ignore, which could improve\\n\"\n            \"# the performance. Learn more at\\n\"\n            \"# https://dvc.org/doc/user-guide/dvcignore\\n\"\n        )\n\n    return dvcignore\n\n\ndef destroy(path: Union[str, os.PathLike[str]]) -> None:\n    from dvc.utils.fs import remove\n\n    dvcignore = os.path.join(path, DvcIgnore.DVCIGNORE_FILE)\n    remove(dvcignore)\n"
  },
  {
    "path": "dvc/info.py",
    "content": "import importlib.metadata as importlib_metadata\nimport itertools\nimport os\nimport pathlib\nimport platform\n\nimport psutil\n\nfrom dvc import PKG, __version__\nfrom dvc.exceptions import NotDvcRepoError\nfrom dvc.fs import Schemes, generic, get_fs_cls, get_fs_config, registry\nfrom dvc.repo import Repo\nfrom dvc.scm import SCMError\nfrom dvc.utils import error_link\n\nSUBPROJECTS = (\n    \"dvc_data\",\n    \"dvc_objects\",\n    \"dvc_render\",\n    \"dvc_task\",\n    \"scmrepo\",\n)\npackage = \"\" if PKG is None else f\" ({PKG})\"\n\n\ndef get_dvc_info():\n    dvc_version = f\"DVC version: {__version__}{package}\"\n    info = [\n        dvc_version,\n        \"-\" * len(dvc_version),\n        f\"Platform: Python {platform.python_version()} on {platform.platform()}\",\n        f\"Subprojects:{_get_subprojects()}\",\n        f\"Supports:{_get_supported_remotes()}\",\n        f\"Config:{_get_config_dirs()}\",\n    ]\n\n    try:\n        with Repo() as repo:\n            # cache_dir might not exist yet (e.g. after `dvc init`), and we\n            # can't auto-create it, as it might cause issues if the user\n            # later decides to enable shared cache mode with\n            # `dvc config cache.shared group`.\n            if os.path.exists(repo.cache.local.path):\n                info.append(f\"Cache types: {_get_linktype_support_info(repo)}\")\n                fs_type = _get_fs_type(repo.cache.local.path)\n                info.append(f\"Cache directory: {fs_type}\")\n            else:\n                info.append(\"Cache types: \" + error_link(\"no-dvc-cache\"))\n\n            info.append(f\"Caches: {_get_caches(repo.cache)}\")\n\n            configured_remotes = _get_remotes(repo.config)\n            remotes = \", \".join(configured_remotes) if configured_remotes else None\n            info.append(f\"Remotes: {remotes}\")\n\n            root_directory = repo.root_dir\n            fs_root = _get_fs_type(os.path.abspath(root_directory))\n            info.append(f\"Workspace directory: {fs_root}\")\n            info.append(f\"Repo: {_get_dvc_repo_info(repo)}\")\n            info.append(f\"Repo.site_cache_dir: {repo.site_cache_dir}\")\n    except NotDvcRepoError:\n        pass\n    except SCMError:\n        info.append(\"Repo: dvc, git (broken)\")\n\n    return \"\\n\".join(info)\n\n\ndef _get_caches(cache):\n    caches = (\n        cache_type\n        for cache_type, cache_instance in cache.by_scheme()\n        if cache_instance and cache_type not in (\"repo\", \"legacy\")\n    )\n\n    # Caches will be always non-empty including the local cache\n    return \", \".join(caches)\n\n\ndef _get_remotes(config):\n    return [\n        get_fs_cls(get_fs_config(config, name=remote)).protocol\n        for remote in config[\"remote\"]\n    ]\n\n\ndef _get_linktype_support_info(repo):\n    odb = repo.cache.local\n\n    links = generic.test_links(\n        [\"reflink\", \"hardlink\", \"symlink\"],\n        odb.fs,\n        odb.path,\n        repo.fs,\n        repo.root_dir,\n    )\n\n    return \", \".join(links)\n\n\ndef _get_subprojects():\n    subprojects = []\n    for subproject in SUBPROJECTS:\n        try:\n            version = importlib_metadata.version(subproject)\n            subprojects.append(f\"{subproject} = {version}\")\n        except ImportError:\n            pass\n\n    return \"\\n\\t\" + \"\\n\\t\".join(subprojects)\n\n\ndef _get_supported_remotes():\n    supported_remotes = []\n    for scheme in registry:\n        if scheme in [Schemes.LOCAL, Schemes.MEMORY, \"dvc\", \"git\"]:\n            continue\n\n        try:\n            fs_cls = registry[scheme]\n        except ImportError:\n            continue\n\n        if not fs_cls.get_missing_deps():\n            dependencies = []\n            for requirement in fs_cls.REQUIRES:\n                dependencies.append(  # noqa: PERF401\n                    f\"{requirement} = {importlib_metadata.version(requirement)}\"\n                )\n\n            remote_info = scheme\n            if dependencies:\n                remote_info += \" (\" + \", \".join(dependencies) + \")\"\n            supported_remotes.append(remote_info)\n\n    assert len(supported_remotes) >= 1\n    return \"\\n\\t\" + \",\\n\\t\".join(supported_remotes)\n\n\ndef _get_config_dirs():\n    from dvc.config import Config\n\n    dirs = [\n        f\"Global: {Config.get_dir('global')}\",\n        f\"System: {Config.get_dir('system')}\",\n    ]\n\n    return \"\\n\\t\" + \"\\n\\t\".join(dirs)\n\n\ndef _get_fs_type(path):\n    partition = {}\n    for part in psutil.disk_partitions(all=True):\n        if part.fstype:\n            try:\n                mountpoint = pathlib.Path(part.mountpoint).resolve()\n                partition[mountpoint] = part.fstype + \" on \" + part.device\n            except PermissionError:\n                pass\n\n    # need to follow the symlink: https://github.com/treeverse/dvc/issues/5065\n    path = pathlib.Path(path).resolve()\n\n    for parent in itertools.chain([path], path.parents):\n        if parent in partition:\n            return partition[parent]\n    return (\"unknown\", \"none\")\n\n\ndef _get_dvc_repo_info(repo):\n    if repo.config.get(\"core\", {}).get(\"no_scm\", False):\n        return \"dvc (no_scm)\"\n\n    if repo.root_dir != repo.scm.root_dir:\n        return \"dvc (subdir), git\"\n\n    return \"dvc, git\"\n"
  },
  {
    "path": "dvc/lock.py",
    "content": "\"\"\"Manages dvc lock file.\"\"\"\n\nimport hashlib\nimport os\nimport time\nfrom abc import ABC, abstractmethod\nfrom datetime import timedelta\nfrom typing import Optional, Union\n\nimport flufl.lock\nimport zc.lockfile\n\nfrom dvc.exceptions import DvcException\nfrom dvc.progress import Tqdm\nfrom dvc.utils import format_link\n\nDEFAULT_TIMEOUT = 3\n\n\nFAILED_TO_LOCK_MESSAGE = (\n    \"Unable to acquire lock. Most likely another DVC process is running or \"\n    \"was terminated abruptly. Check the page {} for other possible reasons \"\n    \"and to learn how to resolve this.\"\n).format(format_link(\"https://dvc.org/doc/user-guide/troubleshooting#lock-issue\"))\n\n\nclass LockError(DvcException):\n    \"\"\"Thrown when unable to acquire the lock for DVC repo.\"\"\"\n\n\nclass LockBase(ABC):\n    @abstractmethod\n    def __init__(self, lockfile):\n        self._lockfile = lockfile\n\n    @property\n    def lockfile(self):\n        return self._lockfile\n\n    @abstractmethod\n    def lock(self):\n        pass\n\n    @abstractmethod\n    def unlock(self):\n        pass\n\n    @property\n    @abstractmethod\n    def is_locked(self):\n        pass\n\n    @abstractmethod\n    def __enter__(self):\n        pass\n\n    @abstractmethod\n    def __exit__(self, typ, value, tbck):\n        pass\n\n\nclass LockNoop(LockBase):\n    def __init__(self, *args, **kwargs):\n        self._lock = False\n\n    def lock(self):\n        self._lock = True\n\n    def unlock(self):\n        if not self.is_locked:\n            raise DvcException(\"Unlock called on an unlocked lock\")\n        self._lock = False\n\n    @property\n    def is_locked(self):\n        return self._lock\n\n    def __enter__(self):\n        self.lock()\n\n    def __exit__(self, typ, value, tbck):\n        self.unlock()\n\n\nclass Lock(LockBase):\n    \"\"\"Class for DVC repo lock.\n\n    Uses zc.lockfile as backend.\n    \"\"\"\n\n    def __init__(self, lockfile, friendly=False, wait=False, **kwargs):\n        super().__init__(lockfile)\n        self._friendly = friendly\n        self._wait = wait\n        self._lock = None\n        self._lock_failed = False\n\n    @property\n    def files(self):\n        return [self._lockfile]\n\n    def _do_lock(self):\n        try:\n            self._lock_failed = False\n            self._lock = zc.lockfile.LockFile(self._lockfile)\n        except zc.lockfile.LockError:\n            self._lock_failed = True\n            raise LockError(FAILED_TO_LOCK_MESSAGE)  # noqa: B904\n\n    def lock(self):\n        \"\"\"Acquire the lock, either waiting forever, or after default_retries.\"\"\"\n        default_retries = 6\n        delay = DEFAULT_TIMEOUT / default_retries\n        attempts = 0\n\n        max_retries = float(\"inf\") if self._wait else default_retries\n\n        with Tqdm(\n            bar_format=\"{desc}\",\n            disable=not self._friendly,\n            desc=\"Waiting to acquire lock. \"\n            \"If DVC froze, see `hardlink_lock` in {}.\".format(\n                format_link(\"https://man.dvc.org/config#core\")\n            ),\n        ) as pbar:\n            while True:\n                try:\n                    self._do_lock()\n                    return\n                except LockError:\n                    attempts += 1\n                    if attempts > max_retries:\n                        raise\n                    time.sleep(delay)\n                finally:\n                    pbar.update()\n\n    def unlock(self):\n        if self._lock_failed:\n            assert self._lock is None\n            return\n\n        if not self.is_locked:\n            raise DvcException(\"Unlock called on an unlocked lock\")\n        assert self._lock\n        self._lock.close()\n        self._lock = None\n\n    @property\n    def is_locked(self):\n        return bool(self._lock)\n\n    def __enter__(self):\n        self.lock()\n\n    def __exit__(self, typ, value, tbck):\n        self.unlock()\n\n\nclass HardlinkLock(flufl.lock.Lock, LockBase):\n    \"\"\"Class for DVC repo lock.\n\n    Args:\n        lockfile (str): the lock filename\n            in.\n        tmp_dir (str): a directory to store claim files.\n    \"\"\"\n\n    def __init__(self, lockfile, tmp_dir=None, wait=False, **kwargs):\n        import socket\n\n        self._tmp_dir = tmp_dir\n        super().__init__(lockfile)\n\n        # NOTE: this is basically Lock.__init__ copy-paste, except that\n        # instead of using `socket.getfqdn()` we use `socket.gethostname()`\n        # to speed this up. We've seen [1] `getfqdn()` take ~5sec to return\n        # anything, which is way too slow. `gethostname()` is actually a\n        # fallback for `getfqdn()` when it is not able to resolve a\n        # canonical hostname through network. The claimfile that uses\n        # `self._hostname` is still usable, as it uses `pid` and random\n        # number to generate the resulting lock file name, which is unique\n        # enough for our application.\n        #\n        # [1] https://github.com/treeverse/dvc/issues/2582\n        self._hostname = socket.gethostname()\n\n        self._lifetime = timedelta(days=365)  # Lock for good by default\n        self._separator = flufl.lock.SEP\n        self._set_claimfile()\n        self._owned = True\n        self._wait = wait\n        self._retry_errnos = []\n        self._friendly = kwargs.get(\"friendly\", False)\n\n    def lock(self, timeout: Optional[Union[timedelta, int]] = None):\n        try:\n            if not self._wait:\n                timeout = timeout or timedelta(seconds=DEFAULT_TIMEOUT)\n\n            with Tqdm(\n                bar_format=\"{desc}\",\n                disable=not (self._wait and self._friendly),\n                desc=\"Waiting to acquire lock\",\n            ):\n                super().lock(timeout)\n        except flufl.lock.TimeOutError:\n            raise LockError(FAILED_TO_LOCK_MESSAGE)  # noqa: B904\n\n    def _set_claimfile(self):\n        super()._set_claimfile()\n\n        if self._tmp_dir is not None:\n            # Under Windows file path length is limited so we hash it\n            hasher = hashlib.md5(self._claimfile.encode(), usedforsecurity=False)\n            filename = hasher.hexdigest()\n            self._claimfile = os.path.join(self._tmp_dir, filename + \".lock\")\n\n\ndef make_lock(lockfile, tmp_dir=None, friendly=False, hardlink_lock=False, wait=False):\n    cls = HardlinkLock if hardlink_lock else Lock\n    return cls(lockfile, tmp_dir=tmp_dir, friendly=friendly, wait=wait)\n"
  },
  {
    "path": "dvc/log.py",
    "content": "# using a separate module instead of using `dvc.logger` to not create an import-cycle.\nimport logging\n\n\nclass LoggerWithTrace(logging.Logger):\n    # only for type checking\n    trace = logging.debug\n\n\nlogger: \"LoggerWithTrace\" = logging.getLogger()  # type: ignore[assignment]\n"
  },
  {
    "path": "dvc/logger.py",
    "content": "\"\"\"Manages logging configuration for DVC repo.\"\"\"\n\nimport logging\nimport os\nimport sys\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import ClassVar\n\nimport colorama\n\nfrom dvc.progress import Tqdm\n\n\ndef add_logging_level(level_name, level_num, method_name=None):\n    \"\"\"\n    Adds a new logging level to the `logging` module and the\n    currently configured logging class.\n\n    Uses the existing numeric level_num if already defined.\n\n    Based on https://stackoverflow.com/questions/2183233\n    \"\"\"\n    if method_name is None:\n        method_name = level_name.lower()\n\n    # If the level name is already defined as a top-level `logging`\n    # constant, then adopt the existing numeric level.\n    if hasattr(logging, level_name):\n        existing_level_num = getattr(logging, level_name)\n        assert isinstance(existing_level_num, int)\n        level_num = existing_level_num\n\n    def log_for_level(self, message, *args, **kwargs):\n        if self.isEnabledFor(level_num):\n            self._log(level_num, message, args, **kwargs)\n\n    def log_to_root(message, *args, **kwargs):\n        logging.log(level_num, message, *args, **kwargs)  # noqa: LOG015\n\n    # getLevelName resolves the numeric log level if already defined,\n    # otherwise returns a string\n    if not isinstance(logging.getLevelName(level_name), int):\n        logging.addLevelName(level_num, level_name)\n\n    if not hasattr(logging, level_name):\n        setattr(logging, level_name, level_num)\n\n    if not hasattr(logging.getLoggerClass(), method_name):\n        setattr(logging.getLoggerClass(), method_name, log_for_level)\n\n    if not hasattr(logging, method_name):\n        setattr(logging, method_name, log_to_root)\n\n\nclass LoggingException(Exception):  # noqa: N818\n    def __init__(self, record):\n        msg = f\"failed to log {record!s}\"\n        super().__init__(msg)\n\n\ndef exclude_filter(level: int):\n    def filter_fn(record: \"logging.LogRecord\") -> bool:\n        return record.levelno < level\n\n    return filter_fn\n\n\nclass ColorFormatter(logging.Formatter):\n    \"\"\"Spit out colored text in supported terminals.\n\n    colorama__ makes ANSI escape character sequences work under Windows.\n    See the colorama documentation for details.\n\n    __ https://pypi.python.org/pypi/colorama\n\n    If record has an extra `tb_only` attribute, it will not show the\n    exception cause, just the message and the traceback.\n    \"\"\"\n\n    reset = colorama.Fore.RESET\n    color_codes: ClassVar[dict[str, str]] = {\n        \"TRACE\": colorama.Fore.GREEN,\n        \"DEBUG\": colorama.Fore.BLUE,\n        \"WARNING\": colorama.Fore.YELLOW,\n        \"ERROR\": colorama.Fore.RED,\n        \"CRITICAL\": colorama.Fore.RED,\n    }\n\n    def __init__(self, log_colors: bool = True, show_traceback: bool = False) -> None:\n        super().__init__()\n        self.log_colors = log_colors\n        self.show_traceback = show_traceback\n\n    def format(self, record) -> str:  # noqa: C901\n        record.message = record.getMessage()\n        msg = self.formatMessage(record)\n\n        if record.levelno == logging.INFO:\n            return msg\n\n        ei = record.exc_info\n        if ei:\n            cause = \"\"\n            if not getattr(record, \"tb_only\", False):\n                cause = \": \".join(_iter_causes(ei[1]))\n            sep = \" - \" if msg and cause else \"\"\n            msg = msg + sep + cause\n\n        asctime = \"\"\n        verbose = _is_verbose()\n        if verbose:\n            asctime = self.formatTime(record, self.datefmt)\n        if verbose or self.show_traceback:\n            if ei and not record.exc_text:\n                record.exc_text = self.formatException(ei)\n            if record.exc_text:\n                if msg[-1:] != \"\\n\":\n                    msg = msg + \"\\n\"\n                msg = msg + record.exc_text + \"\\n\"\n            if record.stack_info:\n                if msg[-1:] != \"\\n\":\n                    msg = msg + \"\\n\"\n                msg = msg + self.formatStack(record.stack_info) + \"\\n\"\n\n        level = record.levelname\n        if self.log_colors:\n            color = self.color_codes[level]\n            if asctime:\n                asctime = color + asctime + self.reset\n            level = color + level + self.reset\n        return asctime + (\" \" if asctime else \"\") + level + \": \" + msg\n\n\nclass LoggerHandler(logging.StreamHandler):\n    def handleError(self, record):  # noqa: N802\n        super().handleError(record)\n        raise LoggingException(record)\n\n    def emit_pretty_exception(self, exc, verbose: bool = False):\n        return exc.__pretty_exc__(verbose=verbose)\n\n    def emit(self, record):\n        \"\"\"Write to Tqdm's stream so as to not break progress-bars\"\"\"\n        try:\n            if record.exc_info:\n                _, exc, *_ = record.exc_info\n                if hasattr(exc, \"__pretty_exc__\"):\n                    try:\n                        self.emit_pretty_exception(exc, verbose=_is_verbose())\n                        if not _is_verbose():\n                            return\n                    except Exception:  # noqa: BLE001, S110\n                        pass\n\n            msg = self.format(record)\n            Tqdm.write(msg, file=self.stream, end=getattr(self, \"terminator\", \"\\n\"))\n            self.flush()\n        except (BrokenPipeError, RecursionError):\n            raise\n        except Exception:  # noqa: BLE001\n            self.handleError(record)\n\n\ndef _is_verbose():\n    return (\n        logging.NOTSET < logging.getLogger(\"dvc\").getEffectiveLevel() <= logging.DEBUG\n    )\n\n\ndef _iter_causes(exc):\n    while exc:\n        yield str(exc)\n        exc = exc.__cause__\n\n\n@contextmanager\ndef set_loggers_level(level: int = logging.INFO) -> Iterator[None]:\n    ret: dict[logging.Logger, int] = {}\n    for name in [\"dvc\", \"dvc_objects\", \"dvc_data\"]:\n        _logger = logging.getLogger(name)\n        ret[_logger] = _logger.getEffectiveLevel()\n        _logger.setLevel(level)\n\n    try:\n        yield\n    finally:\n        for _logger, old_lvl in ret.items():\n            _logger.setLevel(old_lvl)\n\n\ndef setup(level: int = logging.INFO, log_colors: bool = True) -> None:\n    colorama.init()\n\n    color_out = log_colors and bool(sys.stdout) and sys.stdout.isatty()\n    color_err = log_colors and bool(sys.stderr) and sys.stderr.isatty()\n\n    formatter = ColorFormatter(log_colors=color_out)\n\n    console_info = LoggerHandler(sys.stdout)\n    console_info.setLevel(logging.INFO)\n    console_info.setFormatter(formatter)\n    console_info.addFilter(exclude_filter(logging.WARNING))\n\n    console_debug = LoggerHandler(sys.stdout)\n    console_debug.setLevel(logging.DEBUG)\n    console_debug.setFormatter(formatter)\n    console_debug.addFilter(exclude_filter(logging.INFO))\n\n    add_logging_level(\"TRACE\", logging.DEBUG - 5)\n\n    console_trace = LoggerHandler(sys.stdout)\n    console_trace.setLevel(logging.TRACE)  # type: ignore[attr-defined]\n    console_trace.setFormatter(formatter)\n    console_trace.addFilter(exclude_filter(logging.DEBUG))\n\n    show_traceback = bool(os.environ.get(\"DVC_SHOW_TRACEBACK\"))\n    err_formatter = ColorFormatter(log_colors=color_err, show_traceback=show_traceback)\n    console_errors = LoggerHandler(sys.stderr)\n    console_errors.setLevel(logging.WARNING)\n    console_errors.setFormatter(err_formatter)\n\n    for name in [\"dvc\", \"dvc_objects\", \"dvc_data\"]:\n        logger = logging.getLogger(name)\n        logger.setLevel(level)\n        for handler in [console_info, console_debug, console_trace, console_errors]:\n            logger.addHandler(handler)\n\n    if level >= logging.DEBUG:\n        # Unclosed session errors for asyncio/aiohttp are only available\n        # on the tracing mode for extensive debug purposes. They are really\n        # noisy, and this is potentially somewhere in the client library\n        # not managing their own session. Even though it is the best practice\n        # for them to do so, we can be assured that these errors raised when\n        # the object is getting deallocated, so no need to take any extensive\n        # action.\n        logging.getLogger(\"asyncio\").setLevel(logging.CRITICAL)\n        logging.getLogger(\"aiohttp\").setLevel(logging.CRITICAL)\n"
  },
  {
    "path": "dvc/output.py",
    "content": "import errno\nimport os\nimport posixpath\nfrom collections import defaultdict\nfrom contextlib import suppress\nfrom operator import itemgetter\nfrom typing import TYPE_CHECKING, Any, Optional, Union\nfrom urllib.parse import urlparse\n\nimport voluptuous as vol\nfrom funcy import collecting, first, project\n\nfrom dvc import prompt\nfrom dvc.exceptions import (\n    CacheLinkError,\n    CheckoutError,\n    CollectCacheError,\n    ConfirmRemoveError,\n    DvcException,\n    MergeError,\n)\nfrom dvc.log import logger\nfrom dvc.utils import format_link\nfrom dvc.utils.objects import cached_property\nfrom dvc_data.hashfile import check as ocheck\nfrom dvc_data.hashfile import load as oload\nfrom dvc_data.hashfile.build import build\nfrom dvc_data.hashfile.checkout import checkout\nfrom dvc_data.hashfile.db import HashFileDB, add_update_tree\nfrom dvc_data.hashfile.hash import DEFAULT_ALGORITHM\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom dvc_data.hashfile.istextfile import istextfile\nfrom dvc_data.hashfile.meta import Meta\nfrom dvc_data.hashfile.transfer import transfer as otransfer\nfrom dvc_data.hashfile.tree import Tree, du\nfrom dvc_objects.errors import ObjectFormatError\n\nfrom .annotations import ANNOTATION_FIELDS, ANNOTATION_SCHEMA, Annotation\nfrom .fs import LocalFileSystem, RemoteMissingDepsError, Schemes, get_cloud_fs\nfrom .fs.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback\nfrom .utils import relpath\nfrom .utils.fs import path_isin\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc_data.hashfile.obj import HashFile\n    from dvc_data.index import DataIndexKey\n\n    from .ignore import CheckIgnoreResult, DvcIgnoreFilter\n\nlogger = logger.getChild(__name__)\n\n\nCHECKSUM_SCHEMA = vol.Any(\n    None,\n    vol.And(str, vol.Length(max=0), vol.SetTo(None)),\n    vol.And(vol.Any(str, vol.And(int, vol.Coerce(str))), vol.Length(min=3), vol.Lower),\n)\n\nCASE_SENSITIVE_CHECKSUM_SCHEMA = vol.Any(\n    None,\n    vol.And(str, vol.Length(max=0), vol.SetTo(None)),\n    vol.And(vol.Any(str, vol.And(int, vol.Coerce(str))), vol.Length(min=3)),\n)\n\n# NOTE: currently there are only 3 possible checksum names:\n#\n#    1) md5 (LOCAL, SSH) (actually DVC 2.x md5-dos2unix)\n#    2) etag (S3, GS, OSS, AZURE, HTTP);\n#    3) checksum (HDFS);\n#\n# so when a few types of outputs share the same name, we only need\n# specify it once.\nHDFS_PARAM_CHECKSUM = \"checksum\"\nS3_PARAM_CHECKSUM = \"etag\"\nCHECKSUMS_SCHEMA = {\n    \"md5\": CHECKSUM_SCHEMA,  # DVC 2.x md5-dos2unix\n    HDFS_PARAM_CHECKSUM: CHECKSUM_SCHEMA,\n    S3_PARAM_CHECKSUM: CASE_SENSITIVE_CHECKSUM_SCHEMA,\n}\n\n\ndef _get(stage, path, **kwargs):\n    return Output(stage, path, **kwargs)\n\n\ndef loadd_from(stage, d_list) -> list[\"Output\"]:\n    ret = []\n    for d in d_list:\n        p = d.pop(Output.PARAM_PATH)\n        cache = d.pop(Output.PARAM_CACHE, True)\n        metric = d.pop(Output.PARAM_METRIC, False)\n        plot = d.pop(Output.PARAM_PLOT, False)\n        persist = d.pop(Output.PARAM_PERSIST, False)\n        remote = d.pop(Output.PARAM_REMOTE, None)\n        annot = {field: d.pop(field, None) for field in ANNOTATION_FIELDS}\n        files = d.pop(Output.PARAM_FILES, None)\n        push = d.pop(Output.PARAM_PUSH, True)\n        hash_name = d.pop(Output.PARAM_HASH, None)\n        fs_config = d.pop(Output.PARAM_FS_CONFIG, None)\n        ret.append(\n            _get(\n                stage,\n                p,\n                info=d,\n                cache=cache,\n                metric=metric,\n                plot=plot,\n                persist=persist,\n                remote=remote,\n                **annot,\n                files=files,\n                push=push,\n                hash_name=hash_name,\n                fs_config=fs_config,\n            )\n        )\n    return ret\n\n\ndef loads_from(\n    stage,\n    s_list,\n    use_cache=True,\n    metric=False,\n    plot=False,\n    persist=False,\n    remote=None,\n    push=True,\n):\n    return [\n        _get(\n            stage,\n            s,\n            info={},\n            cache=use_cache,\n            metric=metric,\n            plot=plot,\n            persist=persist,\n            remote=remote,\n            push=push,\n        )\n        for s in s_list\n    ]\n\n\ndef _split_dict(d, keys):\n    return project(d, keys), project(d, d.keys() - keys)\n\n\ndef _merge_data(s_list):\n    d: dict[str, dict] = defaultdict(dict)\n    for key in s_list:\n        if isinstance(key, str):\n            d[key].update({})\n            continue\n        if not isinstance(key, dict):\n            raise ValueError(f\"'{type(key).__name__}' not supported.\")  # noqa: TRY004\n\n        for k, flags in key.items():\n            if not isinstance(flags, dict):\n                raise ValueError(  # noqa: TRY004\n                    f\"Expected dict for '{k}', got: '{type(flags).__name__}'\"\n                )\n            d[k].update(flags)\n    return d\n\n\n@collecting\ndef load_from_pipeline(stage, data, typ=\"outs\"):\n    if typ not in (stage.PARAM_OUTS, stage.PARAM_METRICS, stage.PARAM_PLOTS):\n        raise ValueError(f\"'{typ}' key is not allowed for pipeline files.\")\n\n    metric = typ == stage.PARAM_METRICS\n    plot = typ == stage.PARAM_PLOTS\n\n    d = _merge_data(data)\n\n    for path, flags in d.items():\n        plt_d = {}\n        if plot:\n            from dvc.schema import PLOT_PROPS\n\n            plt_d, flags = _split_dict(flags, keys=PLOT_PROPS.keys())\n\n        extra = project(\n            flags,\n            [\n                Output.PARAM_CACHE,\n                Output.PARAM_PERSIST,\n                Output.PARAM_REMOTE,\n                Output.PARAM_PUSH,\n                *ANNOTATION_FIELDS,\n            ],\n        )\n\n        yield _get(stage, path, info={}, plot=plt_d or plot, metric=metric, **extra)\n\n\ndef split_file_meta_from_cloud(entry: dict) -> dict:\n    if remote_name := entry.pop(Meta.PARAM_REMOTE, None):\n        remote_meta = {}\n        for key in (S3_PARAM_CHECKSUM, HDFS_PARAM_CHECKSUM, Meta.PARAM_VERSION_ID):\n            if value := entry.pop(key, None):\n                remote_meta[key] = value\n\n        if remote_meta:\n            entry[Output.PARAM_CLOUD] = {remote_name: remote_meta}\n    return entry\n\n\ndef merge_file_meta_from_cloud(entry: dict) -> dict:\n    cloud_meta = entry.pop(Output.PARAM_CLOUD, {})\n    if remote_name := first(cloud_meta):\n        entry.update(cloud_meta[remote_name])\n        entry[Meta.PARAM_REMOTE] = remote_name\n    return entry\n\n\ndef _serialize_tree_obj_to_files(obj: Tree) -> list[dict[str, Any]]:\n    key = obj.PARAM_RELPATH\n    return sorted(\n        (\n            {\n                key: posixpath.sep.join(parts),\n                **_serialize_hi_to_dict(hi),\n                **meta.to_dict(),\n            }\n            for parts, meta, hi in obj\n        ),\n        key=itemgetter(key),\n    )\n\n\ndef _serialize_hi_to_dict(hash_info: Optional[HashInfo]) -> dict[str, Any]:\n    if hash_info:\n        if hash_info.name == \"md5-dos2unix\":\n            return {\"md5\": hash_info.value}\n        return hash_info.to_dict()\n    return {}\n\n\nclass OutputDoesNotExistError(DvcException):\n    def __init__(self, path):\n        msg = f\"output '{path}' does not exist\"\n        super().__init__(msg)\n\n\nclass OutputIsNotFileOrDirError(DvcException):\n    def __init__(self, path):\n        msg = f\"output '{path}' is not a file or directory\"\n        super().__init__(msg)\n\n\nclass OutputAlreadyTrackedError(DvcException):\n    def __init__(self, path):\n        msg = f\"\"\" output '{path}' is already tracked by SCM (e.g. Git).\n    You can remove it from Git, then add to DVC.\n        To stop tracking from Git:\n            git rm -r --cached '{path}'\n            git commit -m \"stop tracking {path}\" \"\"\"\n        super().__init__(msg)\n\n\nclass OutputIsStageFileError(DvcException):\n    def __init__(self, path):\n        super().__init__(f\"DVC file '{path}' cannot be an output.\")\n\n\nclass OutputIsIgnoredError(DvcException):\n    def __init__(self, result: \"CheckIgnoreResult\"):\n        pi = result.pattern_infos[-1]\n        super().__init__(f\"Path {result.file!r} is ignored by {pi!s}\")\n\n\nclass CheckoutCallback(TqdmCallback):\n    # disable branching for checkouts\n    branch = Callback.branch  # type: ignore[assignment]\n\n\nclass Output:\n    IS_DEPENDENCY = False\n\n    PARAM_PATH = \"path\"\n    PARAM_CACHE = \"cache\"\n    PARAM_FILES = \"files\"\n    PARAM_METRIC = \"metric\"\n    PARAM_METRIC_TYPE = \"type\"\n    PARAM_METRIC_XPATH = \"xpath\"\n    PARAM_PLOT = \"plot\"\n    PARAM_PLOT_TEMPLATE = \"template\"\n    PARAM_PLOT_X = \"x\"\n    PARAM_PLOT_Y = \"y\"\n    PARAM_PLOT_X_LABEL = \"x_label\"\n    PARAM_PLOT_Y_LABEL = \"y_label\"\n    PARAM_PLOT_TITLE = \"title\"\n    PARAM_PLOT_HEADER = \"header\"\n    PARAM_PERSIST = \"persist\"\n    PARAM_REMOTE = \"remote\"\n    PARAM_PUSH = \"push\"\n    PARAM_CLOUD = \"cloud\"\n    PARAM_HASH = \"hash\"\n    PARAM_FS_CONFIG = \"fs_config\"\n\n    DoesNotExistError: type[DvcException] = OutputDoesNotExistError\n    IsNotFileOrDirError: type[DvcException] = OutputIsNotFileOrDirError\n    IsStageFileError: type[DvcException] = OutputIsStageFileError\n    IsIgnoredError: type[DvcException] = OutputIsIgnoredError\n\n    def __init__(  # noqa: PLR0913\n        self,\n        stage,\n        path,\n        info=None,\n        cache=True,\n        metric=False,\n        plot=False,\n        persist=False,\n        desc=None,\n        type=None,  # noqa: A002\n        labels=None,\n        meta=None,\n        remote=None,\n        repo=None,\n        fs_config=None,\n        files: Optional[list[dict[str, Any]]] = None,\n        push: bool = True,\n        hash_name: Optional[str] = DEFAULT_ALGORITHM,\n    ):\n        self.annot = Annotation(\n            desc=desc, type=type, labels=labels or [], meta=meta or {}\n        )\n        self.repo: Optional[Repo] = stage.repo if not repo and stage else repo\n        meta_d = merge_file_meta_from_cloud(info or {})\n        meta = Meta.from_dict(meta_d)\n        # NOTE: when version_aware is not passed into get_cloud_fs, it will be\n        # set based on whether or not path is versioned\n        fs_kwargs = {}\n        if meta.version_id or files:\n            fs_kwargs[\"version_aware\"] = True\n\n        self.def_fs_config = fs_config\n        if fs_config is not None:\n            fs_kwargs.update(**fs_config)\n\n        fs_cls, fs_config, fs_path = get_cloud_fs(\n            self.repo.config if self.repo else {},\n            url=path,\n            **fs_kwargs,\n        )\n        self.fs = fs_cls(**fs_config)\n\n        if (\n            self.fs.protocol == \"local\"\n            and stage\n            and isinstance(stage.repo.fs, LocalFileSystem)\n            and path_isin(path, stage.repo.root_dir)\n        ):\n            self.def_path: str = relpath(path, stage.wdir)\n            self.fs = stage.repo.fs\n        else:\n            self.def_path = path\n\n        if (\n            self.repo\n            and self.fs.protocol == \"local\"\n            and not self.fs.isabs(self.def_path)\n        ):\n            self.fs = self.repo.fs\n\n        self._validate_output_path(path, stage)\n        # This output (and dependency) objects have too many paths/urls\n        # here is a list and comments:\n        #\n        #   .def_path - path from definition in DVC file\n        #   .fspath - local only, resolved\n        #   .__str__ - for presentation purposes, def_path/relpath\n        #\n        # By resolved path, which contains actual location,\n        # should be absolute and don't contain remote:// refs.\n        self.stage = stage\n        self.meta = meta\n\n        if files is not None:\n            files = [merge_file_meta_from_cloud(f) for f in files]\n        self.files = files\n        self.use_cache = False if self.IS_DEPENDENCY else cache\n        self.metric = False if self.IS_DEPENDENCY else metric\n        self.plot = False if self.IS_DEPENDENCY else plot\n        self.persist = persist\n        self.can_push = push\n\n        self.fs_path = self._parse_path(self.fs, fs_path)\n        self.obj: Optional[HashFile] = None\n\n        self.remote = remote\n\n        if self.fs.version_aware:\n            _, version_id = self.fs.coalesce_version(\n                self.def_path, self.meta.version_id\n            )\n            self.meta.version_id = version_id\n\n        self.hash_name, self.hash_info = self._compute_hash_info_from_meta(hash_name)\n        self._compute_meta_hash_info_from_files()\n\n    def _compute_hash_info_from_meta(\n        self, hash_name: Optional[str]\n    ) -> tuple[str, HashInfo]:\n        if self.is_in_repo:\n            if hash_name is None:\n                # Legacy 2.x output, use \"md5-dos2unix\" but read \"md5\" from\n                # file meta\n                hash_name = \"md5-dos2unix\"\n                meta_name = \"md5\"\n            else:\n                meta_name = hash_name\n        else:\n            hash_name = meta_name = self.fs.PARAM_CHECKSUM\n        assert hash_name\n\n        hash_info = HashInfo(name=hash_name, value=getattr(self.meta, meta_name, None))\n        return hash_name, hash_info\n\n    def _compute_meta_hash_info_from_files(self) -> None:\n        if self.files:\n            tree = Tree.from_list(self.files, hash_name=self.hash_name)\n            tree.digest(with_meta=True)\n\n            self.hash_info = tree.hash_info\n            self.meta.isdir = True\n            self.meta.nfiles = len(self.files)\n            self.meta.size = sum(filter(None, (f.get(\"size\") for f in self.files)))\n            self.meta.remote = first(f.get(\"remote\") for f in self.files)\n        elif self.meta.nfiles or (self.hash_info and self.hash_info.isdir):\n            self.meta.isdir = True\n            if not self.hash_info and self.hash_name not in (\"md5\", \"md5-dos2unix\"):\n                md5 = getattr(self.meta, \"md5\", None)\n                if md5:\n                    self.hash_info = HashInfo(\"md5\", md5)\n\n    def _parse_path(self, fs, fs_path):\n        parsed = urlparse(self.def_path)\n        if (\n            parsed.scheme != \"remote\"\n            and self.stage\n            and self.stage.repo.fs == fs\n            and not fs.isabs(fs_path)\n        ):\n            # NOTE: we can path either from command line or .dvc file,\n            # so we should expect both posix and windows style paths.\n            # paths accepts both, i.e. / works everywhere, \\ only on win.\n            #\n            # FIXME: if we have Windows path containing / or posix one with \\\n            # then we have #2059 bug and can't really handle that.\n            fs_path = fs.join(self.stage.wdir, fs_path)\n\n        return fs.abspath(fs.normpath(fs_path))\n\n    def __repr__(self):\n        return f\"{type(self).__name__}: {self.def_path!r}\"\n\n    def __str__(self):\n        if self.fs.protocol != \"local\":\n            return self.def_path\n\n        if (\n            not self.repo\n            or urlparse(self.def_path).scheme == \"remote\"\n            or os.path.isabs(self.def_path)\n        ):\n            return str(self.def_path)\n\n        if not self.fs.isin(self.fs_path, self.repo.root_dir):\n            return self.fs_path\n\n        cur_dir = self.fs.getcwd()\n        if self.fs.isin(cur_dir, self.repo.root_dir):\n            return self.fs.relpath(self.fs_path, cur_dir)\n\n        return self.fs.relpath(self.fs_path, self.repo.root_dir)\n\n    def clear(self):\n        self.hash_info = HashInfo.from_dict({})\n        self.meta = Meta.from_dict({})\n        self.obj = None\n        self.files = None\n\n    @property\n    def protocol(self):\n        return self.fs.protocol\n\n    @property\n    def is_in_repo(self):\n        if urlparse(self.def_path).scheme == \"remote\":\n            return False\n\n        if self.fs.isabs(self.def_path):\n            return False\n\n        return self.repo and self.fs.isin(self.fs_path, self.repo.root_dir)\n\n    @property\n    def use_scm_ignore(self):\n        if not self.is_in_repo:\n            return False\n\n        return self.use_cache or self.stage.is_repo_import\n\n    @property\n    def cache(self):\n        from dvc.cachemgr import LEGACY_HASH_NAMES\n\n        assert self.is_in_repo\n        assert self.repo\n        odb_name = \"legacy\" if self.hash_name in LEGACY_HASH_NAMES else \"repo\"\n        return getattr(self.repo.cache, odb_name)\n\n    @property\n    def local_cache(self):\n        from dvc.cachemgr import LEGACY_HASH_NAMES\n\n        assert self.repo\n        if self.hash_name in LEGACY_HASH_NAMES:\n            return self.repo.cache.legacy\n        return self.repo.cache.local\n\n    @property\n    def cache_path(self):\n        return self.cache.fs.unstrip_protocol(\n            self.cache.oid_to_path(self.hash_info.value)\n        )\n\n    def get_hash(self):\n        _, hash_info = self._get_hash_meta()\n        return hash_info\n\n    def _build(\n        self, *args, no_progress_bar=False, **kwargs\n    ) -> tuple[\"HashFileDB\", \"Meta\", \"HashFile\"]:\n        from dvc.ui import ui\n\n        with ui.progress(\n            unit=\"file\",\n            desc=f\"Collecting files and computing hashes in {self}\",\n            disable=no_progress_bar,\n        ) as pb:\n            kwargs[\"callback\"] = pb.as_callback()\n            kwargs.setdefault(\"checksum_jobs\", self.fs.hash_jobs)\n            return build(*args, **kwargs)\n\n    def _get_hash_meta(self):\n        if self.use_cache:\n            odb = self.cache\n        else:\n            odb = self.local_cache\n        _, meta, obj = self._build(\n            odb,\n            self.fs_path,\n            self.fs,\n            self.hash_name,\n            ignore=self.dvcignore,\n            dry_run=not self.use_cache,\n        )\n        return meta, obj.hash_info\n\n    def get_meta(self) -> Meta:\n        meta, _ = self._get_hash_meta()\n        return meta\n\n    @property\n    def is_dir_checksum(self):\n        return self.hash_info.isdir\n\n    def _is_path_dvcignore(self, path) -> bool:\n        if self.IS_DEPENDENCY or not self.dvcignore:\n            return False\n        return self.dvcignore.is_ignored(self.fs, path, ignore_subrepos=False)\n\n    @property\n    def exists(self):\n        if self._is_path_dvcignore(self.fs_path):\n            return False\n\n        return self.fs.exists(self.fs_path)\n\n    @cached_property\n    def index_key(self) -> tuple[str, \"DataIndexKey\"]:\n        if self.is_in_repo:\n            workspace = \"repo\"\n            assert self.repo\n            key = self.repo.fs.relparts(self.fs_path, self.repo.root_dir)\n        else:\n            workspace = self.fs.protocol\n            no_drive = self.fs.flavour.splitdrive(self.fs_path)[1]\n            key = self.fs.parts(no_drive)[1:]\n        return workspace, key\n\n    def changed_checksum(self):\n        return self.hash_info != self.get_hash()\n\n    def changed_cache(self, filter_info=None):\n        if not self.use_cache or not self.hash_info:\n            return True\n\n        obj = self.get_obj(filter_info=filter_info)\n        if not obj:\n            return True\n\n        try:\n            ocheck(self.cache, obj)\n            return False\n        except (FileNotFoundError, ObjectFormatError):\n            return True\n\n    def changed_meta(self) -> bool:\n        if self.fs.version_aware and self.meta.version_id:\n            return self.meta.version_id != self.get_meta().version_id\n        return False\n\n    def workspace_status(self) -> dict[str, str]:\n        if not self.exists:\n            return {str(self): \"deleted\"}\n\n        if self.changed_checksum():\n            return {str(self): \"modified\"}\n\n        if not self.hash_info:\n            return {str(self): \"new\"}\n\n        return {}\n\n    def status(self) -> dict[str, str]:\n        if self.hash_info and self.use_cache and self.changed_cache():\n            return {str(self): \"not in cache\"}\n\n        return self.workspace_status()\n\n    def changed(self) -> bool:\n        status = self.status()\n        logger.debug(str(status))\n        return bool(status)\n\n    @property\n    def dvcignore(self) -> Optional[\"DvcIgnoreFilter\"]:\n        if self.fs.protocol == \"local\":\n            assert self.repo\n            return self.repo.dvcignore\n        return None\n\n    @property\n    def is_empty(self) -> bool:\n        return self.fs.is_empty(self.fs_path)\n\n    def isdir(self) -> bool:\n        if self._is_path_dvcignore(self.fs_path):\n            return False\n        return self.fs.isdir(self.fs_path)\n\n    def isfile(self) -> bool:\n        if self._is_path_dvcignore(self.fs_path):\n            return False\n        return self.fs.isfile(self.fs_path)\n\n    def ignore(self) -> None:\n        if not self.use_scm_ignore:\n            return\n\n        assert self.repo\n        if self.repo.scm.is_tracked(self.fspath):\n            raise OutputAlreadyTrackedError(self)\n\n        self.repo.scm_context.ignore(self.fspath)\n\n    def ignore_remove(self) -> None:\n        if not self.use_scm_ignore:\n            return\n\n        assert self.repo\n        self.repo.scm_context.ignore_remove(self.fspath)\n\n    def save(self) -> None:\n        if self.use_cache and not self.is_in_repo:\n            raise DvcException(\n                f\"Saving cached external output {self!s} is not supported \"\n                \"since DVC 3.0. See \"\n                f\"{format_link('https://dvc.org/doc/user-guide/upgrade')} \"\n                \"for more info.\"\n            )\n\n        if not self.exists:\n            raise self.DoesNotExistError(self)\n\n        if not self.isfile() and not self.isdir():\n            raise self.IsNotFileOrDirError(self)\n\n        if self.is_empty:\n            logger.warning(\"'%s' is empty.\", self)\n\n        self.ignore()\n\n        if self.metric:\n            self.verify_metric()\n\n        self.update_legacy_hash_name()\n        if self.use_cache:\n            _, self.meta, self.obj = self._build(\n                self.cache,\n                self.fs_path,\n                self.fs,\n                self.hash_name,\n                ignore=self.dvcignore,\n            )\n        else:\n            _, self.meta, self.obj = self._build(\n                self.local_cache,\n                self.fs_path,\n                self.fs,\n                self.hash_name,\n                ignore=self.dvcignore,\n                dry_run=True,\n            )\n            if not self.IS_DEPENDENCY:\n                logger.debug(\"Output '%s' doesn't use cache. Skipping saving.\", self)\n\n        self.hash_info = self.obj.hash_info\n        self.files = None\n\n    def update_legacy_hash_name(self, force: bool = False):\n        if self.hash_name == \"md5-dos2unix\" and (force or self.changed_checksum()):\n            self.hash_name = \"md5\"\n\n    def set_exec(self) -> None:\n        if self.isfile() and self.meta.isexec:\n            self.cache.set_exec(self.fs_path)\n\n    def _checkout(self, *args, **kwargs) -> Optional[bool]:\n        from dvc_data.hashfile.checkout import CheckoutError as _CheckoutError\n        from dvc_data.hashfile.checkout import LinkError, PromptError\n\n        kwargs.setdefault(\"ignore\", self.dvcignore)\n        kwargs.setdefault(\"checksum_jobs\", self.fs.hash_jobs)\n        try:\n            return checkout(*args, **kwargs)\n        except PromptError as exc:\n            raise ConfirmRemoveError(exc.path)  # noqa: B904\n        except LinkError as exc:\n            raise CacheLinkError([exc.path])  # noqa: B904\n        except _CheckoutError as exc:\n            raise CheckoutError(exc.paths, {})  # noqa: B904\n\n    def commit(self, filter_info=None, relink=True) -> None:\n        if not self.exists:\n            raise self.DoesNotExistError(self)\n\n        assert self.hash_info\n\n        if self.use_cache:\n            granular = (\n                self.is_dir_checksum and filter_info and filter_info != self.fs_path\n            )\n            hardlink = relink and next(iter(self.cache.cache_types), None) == \"hardlink\"\n            if granular:\n                obj = self._commit_granular_dir(filter_info, hardlink=hardlink)\n            else:\n                staging, _, obj = self._build(\n                    self.cache,\n                    filter_info or self.fs_path,\n                    self.fs,\n                    self.hash_name,\n                    ignore=self.dvcignore,\n                )\n                with TqdmCallback(\n                    desc=f\"Committing {self} to cache\",\n                    unit=\"file\",\n                ) as cb:\n                    otransfer(\n                        staging,\n                        self.cache,\n                        {obj.hash_info},\n                        shallow=False,\n                        hardlink=hardlink,\n                        callback=cb,\n                    )\n            if relink:\n                assert self.repo\n                rel = self.fs.relpath(filter_info or self.fs_path)\n                with CheckoutCallback(desc=f\"Checking out {rel}\", unit=\"files\") as cb:\n                    self._checkout(\n                        filter_info or self.fs_path,\n                        self.fs,\n                        obj,\n                        self.cache,\n                        relink=True,\n                        state=self.repo.state,\n                        prompt=prompt.confirm,\n                        progress_callback=cb,\n                        old=obj,\n                    )\n                self.set_exec()\n\n    def _commit_granular_dir(self, filter_info, hardlink) -> Optional[\"HashFile\"]:\n        prefix = self.fs.parts(self.fs.relpath(filter_info, self.fs_path))\n        staging, _, obj = self._build(\n            self.cache, self.fs_path, self.fs, self.hash_name, ignore=self.dvcignore\n        )\n        assert isinstance(obj, Tree)\n        save_obj = obj.filter(prefix)\n        assert isinstance(save_obj, Tree)\n        checkout_obj = save_obj.get_obj(self.cache, prefix)\n        with TqdmCallback(desc=f\"Committing {self} to cache\", unit=\"file\") as cb:\n            otransfer(\n                staging,\n                self.cache,\n                {save_obj.hash_info} | {oid for _, _, oid in save_obj},\n                shallow=True,\n                hardlink=hardlink,\n                callback=cb,\n            )\n        return checkout_obj\n\n    def dumpd(self, **kwargs):  # noqa: C901, PLR0912\n        from dvc.cachemgr import LEGACY_HASH_NAMES\n\n        ret: dict[str, Any] = {}\n        with_files = (\n            (not self.IS_DEPENDENCY or kwargs.get(\"datasets\") or self.stage.is_import)\n            and self.hash_info.isdir\n            and (kwargs.get(\"with_files\") or self.files is not None)\n        )\n\n        if not with_files:\n            meta_d = self.meta.to_dict()\n            meta_d.pop(\"isdir\", None)\n            if self.hash_name in LEGACY_HASH_NAMES:\n                # 2.x checksums get serialized with file meta\n                name = \"md5\" if self.hash_name == \"md5-dos2unix\" else self.hash_name\n                ret.update({name: self.hash_info.value})\n            else:\n                ret.update(self.hash_info.to_dict())\n            ret.update(split_file_meta_from_cloud(meta_d))\n\n        if self.is_in_repo:\n            path = self.fs.as_posix(relpath(self.fs_path, self.stage.wdir))\n        else:\n            path = self.def_path\n\n        if self.hash_name not in LEGACY_HASH_NAMES:\n            ret[self.PARAM_HASH] = \"md5\"\n\n        ret[self.PARAM_PATH] = path\n\n        if self.def_fs_config:\n            ret[self.PARAM_FS_CONFIG] = self.def_fs_config\n\n        if not self.IS_DEPENDENCY:\n            ret.update(self.annot.to_dict())\n            if not self.use_cache:\n                ret[self.PARAM_CACHE] = self.use_cache\n\n            if (\n                isinstance(self.metric, dict)\n                and self.PARAM_METRIC_XPATH in self.metric\n                and not self.metric[self.PARAM_METRIC_XPATH]\n            ):\n                del self.metric[self.PARAM_METRIC_XPATH]\n\n            if self.metric:\n                ret[self.PARAM_METRIC] = self.metric\n\n            if self.plot:\n                ret[self.PARAM_PLOT] = self.plot\n\n            if self.persist:\n                ret[self.PARAM_PERSIST] = self.persist\n\n            if self.remote:\n                ret[self.PARAM_REMOTE] = self.remote\n\n            if not self.can_push:\n                ret[self.PARAM_PUSH] = self.can_push\n\n        if with_files:\n            obj = self.obj or self.get_obj()\n            if obj:\n                assert isinstance(obj, Tree)\n                ret[self.PARAM_FILES] = [\n                    split_file_meta_from_cloud(f)\n                    for f in _serialize_tree_obj_to_files(obj)\n                ]\n        return ret\n\n    def verify_metric(self):\n        if self.fs.protocol != \"local\":\n            raise DvcException(f\"verify metric is not supported for {self.protocol}\")\n        if not self.metric:\n            return\n\n        if not os.path.exists(self.fs_path):\n            return\n\n        if os.path.isdir(self.fs_path):\n            msg = \"directory '%s' cannot be used as %s.\"\n            logger.debug(msg, str(self), \"metrics\")\n            return\n\n        if not istextfile(self.fs_path, self.fs):\n            raise DvcException(\n                f\"binary file '{self.fs_path}' cannot be used as metrics.\"\n            )\n\n    def get_obj(\n        self, filter_info: Optional[str] = None, **kwargs\n    ) -> Optional[\"HashFile\"]:\n        obj: Optional[HashFile] = None\n        if self.obj:\n            obj = self.obj\n        elif self.files:\n            tree = Tree.from_list(self.files, hash_name=self.hash_name)\n            tree.digest()\n            obj = tree\n        elif self.hash_info:\n            try:\n                obj = oload(self.cache, self.hash_info)\n            except (FileNotFoundError, ObjectFormatError):\n                return None\n        else:\n            return None\n\n        assert obj\n        fs_path = self.fs\n        if filter_info and filter_info != self.fs_path:\n            prefix = fs_path.relparts(filter_info, self.fs_path)\n            assert isinstance(obj, Tree)\n            obj = obj.get_obj(self.cache, prefix)\n\n        return obj\n\n    def checkout(\n        self,\n        force: bool = False,\n        progress_callback: \"Callback\" = DEFAULT_CALLBACK,\n        relink: bool = False,\n        filter_info: Optional[str] = None,\n        allow_missing: bool = False,\n        **kwargs,\n    ) -> Optional[tuple[bool, Optional[bool]]]:\n        # callback passed act as a aggregate callback.\n        # do not let checkout to call set_size and change progressbar.\n        class CallbackProxy(Callback):\n            def relative_update(self, inc: int = 1) -> None:\n                progress_callback.relative_update(inc)\n                return super().relative_update(inc)\n\n        callback = CallbackProxy()\n        if not self.use_cache:\n            callback.relative_update(self.get_files_number(filter_info))\n            return None\n\n        obj = self.get_obj(filter_info=filter_info)\n        if not obj and (filter_info and filter_info != self.fs_path):\n            # backward compatibility\n            return None\n\n        added = not self.exists\n\n        try:\n            assert self.repo\n            modified = self._checkout(\n                filter_info or self.fs_path,\n                self.fs,\n                obj,\n                self.cache,\n                force=force,\n                progress_callback=callback,\n                relink=relink,\n                state=self.repo.state,\n                prompt=prompt.confirm,\n                **kwargs,\n            )\n        except CheckoutError:\n            if allow_missing:\n                return None\n            raise\n        self.set_exec()\n        return added, False if added else modified\n\n    def remove(self, ignore_remove=False):\n        try:\n            self.fs.remove(self.fs_path, recursive=True)\n        except FileNotFoundError:\n            pass\n        if self.protocol != Schemes.LOCAL:\n            return\n\n        if ignore_remove:\n            self.ignore_remove()\n\n    def move(self, out: \"Output\") -> None:\n        src_exists = self.exists\n        if src_exists:\n            self.fs.move(self.fs_path, out.fs_path)\n        else:\n            logger.warning(\"%r missing\", self.fspath)\n\n        if self.protocol == \"local\" and self.use_scm_ignore:\n            assert self.repo\n            self.repo.scm_context.ignore_remove(self.fspath)\n\n        self.def_path = out.def_path\n        self.fs_path = out.fs_path\n        try:\n            self.save()\n            self.commit()\n        except self.DoesNotExistError:\n            self.ignore()\n\n    def transfer(\n        self, source, odb=None, jobs=None, update=False, no_progress_bar=False\n    ):\n        if odb is None:\n            odb = self.cache\n\n        cls, config, from_info = get_cloud_fs(\n            self.repo.config if self.repo else {}, url=source\n        )\n        from_fs = cls(**config)\n\n        # When running import-url --to-remote / add --to-remote/-o ... we\n        # assume that it is unlikely that the odb will contain majority of the\n        # hashes, so we transfer everything as is (even if that file might\n        # already be in the cache) and don't waste an upload to scan the layout\n        # of the source location. But when doing update --to-remote, there is\n        # a high probability that the odb might contain some of the hashes, so\n        # we first calculate all the hashes (but don't transfer anything) and\n        # then only update the missing cache files.\n\n        upload = not (update and from_fs.isdir(from_info))\n        jobs = jobs or min((from_fs.jobs, odb.fs.jobs))\n        staging, self.meta, obj = self._build(\n            odb,\n            from_info,\n            from_fs,\n            DEFAULT_ALGORITHM,\n            upload=upload,\n            no_progress_bar=no_progress_bar,\n        )\n        with TqdmCallback(\n            desc=f\"Transferring to {odb.fs.unstrip_protocol(odb.path)}\",\n            unit=\"file\",\n        ) as cb:\n            otransfer(\n                staging,\n                odb,\n                {obj.hash_info},\n                jobs=jobs,\n                hardlink=False,\n                shallow=False,\n                callback=cb,\n            )\n\n        self.hash_info = obj.hash_info\n        self.files = None\n        return obj\n\n    def get_files_number(self, filter_info=None):\n        if not self.use_cache or not self.hash_info:\n            return 0\n\n        if not self.hash_info.isdir:\n            return 1\n\n        if not filter_info or filter_info == self.fs_path:\n            return self.meta.nfiles or 0\n\n        obj = self.get_obj(filter_info=filter_info)\n        return len(obj) if obj else 0\n\n    def unprotect(self):\n        if self.exists and self.use_cache:\n            with TqdmCallback(\n                size=self.meta.nfiles or -1, desc=f\"Unprotecting {self}\"\n            ) as callback:\n                self.cache.unprotect(self.fs_path, callback=callback)\n\n    def get_dir_cache(self, **kwargs) -> Optional[\"Tree\"]:\n        if not self.is_dir_checksum:\n            raise DvcException(\"cannot get dir cache for file checksum\")\n\n        obj = self.cache.get(self.hash_info.value)\n        try:\n            ocheck(self.cache, obj)\n        except FileNotFoundError:\n            if self.remote:\n                kwargs[\"remote\"] = self.remote\n            with suppress(Exception):\n                assert self.repo\n                self.repo.cloud.pull([obj.hash_info], **kwargs)\n\n        if self.obj:\n            assert isinstance(self.obj, Tree)\n            return self.obj\n\n        try:\n            obj = oload(self.cache, self.hash_info)\n            assert isinstance(obj, Tree)\n        except (FileNotFoundError, ObjectFormatError):\n            obj = None\n\n        self.obj = obj\n        return obj\n\n    def _collect_used_dir_cache(\n        self, remote=None, force=False, jobs=None, filter_info=None\n    ) -> Optional[\"Tree\"]:\n        \"\"\"Fetch dir cache and return used object IDs for this out.\"\"\"\n\n        try:\n            self.get_dir_cache(jobs=jobs, remote=remote)\n        except RemoteMissingDepsError:\n            raise\n        except DvcException:\n            logger.debug(\"failed to pull cache for '%s'\", self)\n\n        try:\n            ocheck(self.cache, self.cache.get(self.hash_info.value))\n        except FileNotFoundError:\n            msg = (\n                \"Missing cache for directory '{}'. \"\n                \"Cache for files inside will be lost. \"\n                \"Would you like to continue? Use '-f' to force.\"\n            )\n            if not force and not prompt.confirm(msg.format(self.fs_path)):\n                raise CollectCacheError(  # noqa: B904\n                    \"unable to fully collect used cache\"\n                    f\" without cache for directory '{self}'\"\n                )\n            return None\n\n        obj = self.get_obj()\n        assert obj is None or isinstance(obj, Tree)\n        if filter_info and filter_info != self.fs_path:\n            assert obj\n            prefix = self.fs.parts(self.fs.relpath(filter_info, self.fs_path))\n            return obj.filter(prefix)\n        return obj\n\n    def get_used_objs(  # noqa: PLR0911\n        self, **kwargs\n    ) -> dict[Optional[\"HashFileDB\"], set[\"HashInfo\"]]:\n        \"\"\"Return filtered set of used object IDs for this out.\"\"\"\n        from dvc.cachemgr import LEGACY_HASH_NAMES\n\n        if not self.use_cache:\n            return {}\n\n        push: bool = kwargs.pop(\"push\", False)\n        if self.stage.is_repo_import:\n            return {}\n\n        if push and not self.can_push:\n            return {}\n\n        if not self.hash_info:\n            msg = (\n                f\"Output '{self}'({self.stage}) is missing version info. \"\n                \"Cache for it will not be collected. \"\n                \"Use `dvc repro` to get your pipeline up to date.\"\n            )\n            if self.exists:\n                msg += (\n                    \"\\n\"\n                    f\"You can also use `dvc commit {self.stage.addressing}` \"\n                    f\"to associate existing '{self}' with {self.stage}.\"\n                )\n            logger.warning(msg)\n            return {}\n\n        obj: Optional[HashFile]\n        if self.is_dir_checksum:\n            obj = self._collect_used_dir_cache(**kwargs)\n        else:\n            obj = self.get_obj(filter_info=kwargs.get(\"filter_info\"))\n            if not obj:\n                obj = self.cache.get(self.hash_info.value)\n\n        if not obj:\n            return {}\n\n        if self.remote:\n            assert self.repo\n            remote_odb = self.repo.cloud.get_remote_odb(\n                name=self.remote, hash_name=self.hash_name\n            )\n            other_odb = self.repo.cloud.get_remote_odb(\n                name=self.remote,\n                hash_name=(\n                    \"md5\" if self.hash_name in LEGACY_HASH_NAMES else \"md5-dos2unix\"\n                ),\n            )\n            return {remote_odb: self._named_obj_ids(obj), other_odb: set()}\n        return {None: self._named_obj_ids(obj)}\n\n    def _named_obj_ids(self, obj):\n        name = str(self)\n        obj.hash_info.obj_name = name\n        oids = {obj.hash_info}\n        if isinstance(obj, Tree):\n            for key, _, oid in obj:\n                oid.obj_name = self.fs.sep.join([name, *key])\n                oids.add(oid)\n        return oids\n\n    def _validate_output_path(self, path, stage=None):\n        from dvc.dvcfile import is_valid_filename\n\n        if is_valid_filename(path):\n            raise self.IsStageFileError(path)\n\n        if stage:\n            abs_path = os.path.join(stage.wdir, path)\n            if self._is_path_dvcignore(abs_path):\n                check: CheckIgnoreResult = stage.repo.dvcignore.check_ignore(abs_path)\n                if check.match:\n                    raise self.IsIgnoredError(check)\n\n    def _check_can_merge(self, out):\n        if self.protocol != out.protocol:\n            raise MergeError(\"unable to auto-merge outputs of different types\")\n\n        my = self.dumpd()\n        other = out.dumpd()\n\n        ignored = [\n            self.hash_name,\n            Meta.PARAM_SIZE,\n            Meta.PARAM_NFILES,\n            Output.PARAM_HASH,\n        ]\n\n        for opt in ignored:\n            my.pop(opt, None)\n            other.pop(opt, None)\n\n        if my != other or self.hash_name != out.hash_name:\n            raise MergeError(\"unable to auto-merge outputs with different options\")\n\n        if not out.is_dir_checksum:\n            raise MergeError(\"unable to auto-merge outputs that are not directories\")\n\n    def merge(self, ancestor, other, allowed=None):\n        from dvc_data.hashfile.tree import MergeError as TreeMergeError\n        from dvc_data.hashfile.tree import merge\n\n        assert other\n\n        if ancestor:\n            self._check_can_merge(ancestor)\n            ancestor_info = ancestor.hash_info\n        else:\n            ancestor_info = None\n\n        self._check_can_merge(self)\n        self._check_can_merge(other)\n\n        try:\n            merged = merge(\n                self.cache,\n                ancestor_info,\n                self.hash_info,\n                other.hash_info,\n                allowed=allowed,\n            )\n        except TreeMergeError as exc:\n            raise MergeError(str(exc)) from exc\n\n        self.cache.add(merged.path, merged.fs, merged.oid)\n\n        self.hash_info = merged.hash_info\n        self.files = None\n        self.meta = Meta(size=du(self.cache, merged), nfiles=len(merged))\n\n    def unstage(self, path: str) -> tuple[\"Meta\", \"Tree\"]:\n        from pygtrie import Trie\n\n        rel_key = tuple(self.fs.parts(self.fs.relpath(path, self.fs_path)))\n\n        if self.hash_info:\n            tree = self.get_dir_cache()\n            if tree is None:\n                raise DvcException(f\"could not read {self.hash_info.value!r}\")\n        else:\n            tree = Tree()\n\n        trie = tree.as_trie()\n        assert isinstance(trie, Trie)\n\n        try:\n            del trie[rel_key:]  # type: ignore[misc]\n        except KeyError:\n            raise FileNotFoundError(  # noqa: B904\n                errno.ENOENT,\n                os.strerror(errno.ENOENT),\n                path,\n            )\n\n        new = tree.from_trie(trie)\n        new.digest()\n        return Meta(nfiles=len(new), isdir=True), new\n\n    def apply(\n        self,\n        path: str,\n        obj: Union[\"Tree\", \"HashFile\"],\n        meta: \"Meta\",\n    ) -> tuple[\"Meta\", \"Tree\"]:\n        from pygtrie import Trie\n\n        append_only = True\n        rel_key = tuple(self.fs.parts(self.fs.relpath(path, self.fs_path)))\n\n        if self.hash_info:\n            tree = self.get_dir_cache()\n            if tree is None:\n                raise DvcException(f\"could not read {self.hash_info.value!r}\")\n        else:\n            tree = Tree()\n\n        trie = tree.as_trie()\n        assert isinstance(trie, Trie)\n\n        try:\n            del trie[rel_key:]  # type: ignore[misc]\n        except KeyError:\n            pass\n        else:\n            append_only = False\n\n        items = {}\n        if isinstance(obj, Tree):\n            items = {(*rel_key, *key): (m, o) for key, m, o in obj}\n        else:\n            items = {rel_key: (meta, obj.hash_info)}\n        trie.update(items)\n\n        new = Tree.from_trie(trie)\n        new.digest()\n\n        size = self.meta.size if self.meta and self.meta.size else None\n        if append_only and size and meta.size is not None:\n            # if files were only appended, we can sum to the existing size\n            size += meta.size\n        elif self.hash_info and self.hash_info == new.hash_info:\n            # if hashes are same, sizes must have been the same\n            size = self.meta.size\n        else:\n            size = None\n\n        meta = Meta(nfiles=len(new), size=size, isdir=True)\n        return meta, new\n\n    def add(  # noqa: C901\n        self, path: Optional[str] = None, no_commit: bool = False, relink: bool = True\n    ) -> Optional[\"HashFile\"]:\n        path = path or self.fs_path\n        if self.hash_info and not self.is_dir_checksum and self.fs_path != path:\n            raise DvcException(\n                f\"Cannot modify '{self}' which is being tracked as a file\"\n            )\n\n        assert self.repo\n        self.update_legacy_hash_name()\n        cache = self.cache if self.use_cache else self.local_cache\n        assert isinstance(cache, HashFileDB)\n\n        new: HashFile\n        try:\n            assert self.hash_name\n            staging, meta, obj = self._build(\n                cache,\n                path,\n                self.fs,\n                self.hash_name,\n                ignore=self.dvcignore,\n                dry_run=not self.use_cache,\n            )\n        except FileNotFoundError as exc:\n            if not self.exists:\n                raise self.DoesNotExistError(self) from exc\n            if not self.is_dir_checksum:\n                raise\n\n            meta, new = self.unstage(path)\n            staging, obj = None, None\n        else:\n            assert obj\n            assert staging\n            if self.fs_path != path:\n                meta, new = self.apply(path, obj, meta)\n                add_update_tree(staging, new)\n            else:\n                new = obj\n\n        self.obj = new\n        self.hash_info = self.obj.hash_info\n        self.meta = meta\n        self.files = None\n        self.ignore()\n\n        if no_commit or not self.use_cache:\n            return obj\n\n        if isinstance(new, Tree):\n            add_update_tree(cache, new)\n\n        if not obj:\n            return obj\n\n        assert staging\n        assert obj.hash_info\n\n        hardlink = relink and next(iter(self.cache.cache_types), None) == \"hardlink\"\n        with TqdmCallback(desc=f\"Adding {self} to cache\", unit=\"file\") as cb:\n            otransfer(\n                staging,\n                self.cache,\n                {obj.hash_info},\n                hardlink=hardlink,\n                shallow=False,\n                callback=cb,\n            )\n\n        if relink:\n            with CheckoutCallback(\n                desc=f\"Checking out {path}\", unit=\"files\"\n            ) as callback:\n                self._checkout(\n                    path,\n                    self.fs,\n                    obj,\n                    self.cache,\n                    relink=True,\n                    state=self.repo.state,\n                    prompt=prompt.confirm,\n                    progress_callback=callback,\n                    old=obj,\n                )\n            self.set_exec()\n        return obj\n\n    @property\n    def fspath(self):\n        return self.fs_path\n\n    @property\n    def is_decorated(self) -> bool:\n        return self.is_metric or self.is_plot\n\n    @property\n    def is_metric(self) -> bool:\n        return bool(self.metric)\n\n    @property\n    def is_plot(self) -> bool:\n        return bool(self.plot)\n\n    def restore_fields(self, other: \"Output\"):\n        \"\"\"Restore attributes that need to be preserved when serialized.\"\"\"\n        self.annot = other.annot\n        self.remote = other.remote\n        self.can_push = other.can_push\n\n    def _get_versioned_meta(\n        self,\n    ) -> Optional[\n        tuple[\"HashInfo\", Optional[\"Meta\"], Optional[Union[\"HashFile\", \"Tree\"]]]\n    ]:\n        if self.files is not None or (\n            self.meta is not None and self.meta.version_id is not None\n        ):\n            old_obj = self.obj if self.obj is not None else self.get_obj()\n            return self.hash_info, self.meta, old_obj\n        return None\n\n    def merge_version_meta(\n        self,\n        old_hi: \"HashInfo\",\n        old_meta: Optional[\"Meta\"],\n        old_obj: Optional[Union[\"HashFile\", \"Tree\"]],\n    ):\n        \"\"\"Merge version meta for files which are unchanged from other.\"\"\"\n        if not self.hash_info:\n            return None\n        if self.hash_info.isdir:\n            return self._merge_dir_version_meta(old_hi, old_obj)\n        if self.hash_info != old_hi:\n            return None\n        self.meta = old_meta\n        return None\n\n    def _merge_dir_version_meta(\n        self, old_hi: \"HashInfo\", old_obj: Optional[Union[\"HashFile\", \"Tree\"]]\n    ):\n        from dvc_data.hashfile.tree import update_meta\n\n        if not self.obj or not old_hi.isdir:\n            return\n        assert isinstance(self.obj, Tree)\n        assert isinstance(old_obj, Tree)\n        updated = update_meta(self.obj, old_obj)\n        assert updated.hash_info == self.obj.hash_info\n        self.obj = updated\n        self.files = updated.as_list(with_meta=True)\n\n\nMETA_SCHEMA = {\n    Meta.PARAM_SIZE: int,\n    Meta.PARAM_NFILES: int,\n    Meta.PARAM_ISEXEC: bool,\n    Meta.PARAM_VERSION_ID: str,\n}\n\nCLOUD_SCHEMA = vol.All({str: META_SCHEMA | CHECKSUMS_SCHEMA}, vol.Length(max=1))\n\nARTIFACT_SCHEMA: dict[Any, Any] = {\n    **CHECKSUMS_SCHEMA,\n    **META_SCHEMA,\n    Output.PARAM_PATH: str,\n    Output.PARAM_PERSIST: bool,\n    Output.PARAM_CLOUD: CLOUD_SCHEMA,\n    Output.PARAM_HASH: str,\n}\n\nDIR_FILES_SCHEMA: dict[Any, Any] = {\n    **CHECKSUMS_SCHEMA,\n    **META_SCHEMA,\n    vol.Required(Tree.PARAM_RELPATH): str,\n    Output.PARAM_CLOUD: CLOUD_SCHEMA,\n}\n\nSCHEMA = {\n    **ARTIFACT_SCHEMA,\n    **ANNOTATION_SCHEMA,\n    Output.PARAM_CACHE: bool,\n    Output.PARAM_REMOTE: str,\n    Output.PARAM_PUSH: bool,\n    Output.PARAM_FILES: [DIR_FILES_SCHEMA],\n    Output.PARAM_FS_CONFIG: dict,\n}\n"
  },
  {
    "path": "dvc/parsing/__init__.py",
    "content": "import logging\nimport os\nfrom collections.abc import Mapping, Sequence\nfrom copy import deepcopy\nfrom itertools import product\nfrom typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union\n\nfrom funcy import collecting, first, isa, join, reraise\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.parsing.interpolate import ParseError\nfrom dvc.utils.objects import cached_property\n\nfrom .context import (\n    Context,\n    ContextError,\n    KeyNotInContext,\n    MergeError,\n    Node,\n    VarsAlreadyLoaded,\n)\nfrom .interpolate import (\n    check_expression,\n    check_recursive_parse_errors,\n    is_interpolated_string,\n    recurse,\n    to_str,\n)\n\nif TYPE_CHECKING:\n    from typing import NoReturn\n\n    from dvc.repo import Repo\n    from dvc.types import DictStrAny\n\n    from .context import SeqOrMap\n\n\nlogger = logger.getChild(__name__)\n\nVARS_KWD = \"vars\"\nWDIR_KWD = \"wdir\"\n\nARTIFACTS_KWD = \"artifacts\"\nDATASETS_KWD = \"datasets\"\nMETRICS_KWD = \"metrics\"\nPARAMS_KWD = \"params\"\nPLOTS_KWD = \"plots\"\nSTAGES_KWD = \"stages\"\n\nFOREACH_KWD = \"foreach\"\nMATRIX_KWD = \"matrix\"\nDO_KWD = \"do\"\n\nDEFAULT_PARAMS_FILE = \"params.yaml\"\n\nJOIN = \"@\"\n\n\nclass ResolveError(DvcException):\n    pass\n\n\nclass EntryNotFound(DvcException):\n    pass\n\n\ndef _format_preamble(msg: str, path: str, spacing: str = \" \") -> str:\n    return f\"failed to parse {msg} in '{path}':{spacing}\"\n\n\ndef format_and_raise(exc: Exception, msg: str, path: str) -> \"NoReturn\":\n    spacing = (\n        \"\\n\" if isinstance(exc, (ParseError, MergeError, VarsAlreadyLoaded)) else \" \"\n    )\n    message = _format_preamble(msg, path, spacing) + str(exc)\n\n    # FIXME: cannot reraise because of how we log \"cause\" of the exception\n    # the error message is verbose, hence need control over the spacing\n    _reraise_err(ResolveError, message, from_exc=exc)\n\n\ndef _reraise_err(\n    exc_cls: type[Exception], *args, from_exc: Optional[Exception] = None\n) -> \"NoReturn\":\n    err = exc_cls(*args)\n    if from_exc and logger.isEnabledFor(logging.DEBUG):\n        raise err from from_exc\n    raise err\n\n\ndef check_syntax_errors(\n    definition: \"DictStrAny\", name: str, path: str, where: str = \"stages\"\n):\n    for key, d in definition.items():\n        try:\n            check_recursive_parse_errors(d)\n        except ParseError as exc:\n            format_and_raise(exc, f\"'{where}.{name}.{key}'\", path)\n\n\ndef is_map_or_seq(data: Any) -> bool:\n    _is_map_or_seq = isa(Mapping, Sequence)\n    return not isinstance(data, str) and _is_map_or_seq(data)\n\n\ndef split_group_name(name: str) -> tuple[str, Optional[str]]:\n    group, *keys = name.rsplit(JOIN, maxsplit=1)\n    return group, first(keys)\n\n\ndef check_interpolations(data: \"DictStrAny\", where: str, path: str):\n    def func(s: \"DictStrAny\") -> None:\n        if is_interpolated_string(s):\n            raise ResolveError(\n                _format_preamble(f\"'{where}'\", path) + \"interpolating is not allowed\"\n            )\n\n    return recurse(func)(data)\n\n\nDefinition = Union[\"ForeachDefinition\", \"EntryDefinition\", \"MatrixDefinition\"]\n\n\ndef make_definition(\n    resolver: \"DataResolver\", name: str, definition: \"DictStrAny\", **kwargs\n) -> Definition:\n    args = resolver, resolver.context, name, definition\n    if MATRIX_KWD in definition:\n        return MatrixDefinition(*args, **kwargs)\n    if FOREACH_KWD in definition:\n        return ForeachDefinition(*args, **kwargs)\n    return EntryDefinition(*args, **kwargs)\n\n\nclass DataResolver:\n    def __init__(self, repo: \"Repo\", wdir: str, d: dict):\n        self.fs = fs = repo.fs\n        self.parsing_config = repo.config.get(\"parsing\", {})\n\n        if os.path.isabs(wdir):\n            wdir = fs.relpath(wdir)\n            wdir = \"\" if wdir == os.curdir else wdir\n\n        self.wdir = wdir\n        self.relpath = fs.normpath(fs.join(self.wdir, \"dvc.yaml\"))\n\n        vars_ = d.get(VARS_KWD, [])\n        check_interpolations(vars_, VARS_KWD, self.relpath)\n        self.context: Context = Context()\n\n        try:\n            args = fs, vars_, wdir  # load from `vars` section\n            self.context.load_from_vars(*args, default=DEFAULT_PARAMS_FILE)\n        except ContextError as exc:\n            format_and_raise(exc, \"'vars'\", self.relpath)\n\n        # we use `tracked_vars` to keep a dictionary of used variables\n        # by the interpolated entries.\n        self.tracked_vars: dict[str, Mapping] = {}\n\n        stages_data = d.get(STAGES_KWD, {})\n        # we wrap the definitions into:\n        # ForeachDefinition, MatrixDefinition, and EntryDefinition\n        # that helps us to optimize, cache and selectively load each one of\n        # them as we need, and simplify all of this DSL/parsing logic.\n        self.definitions: dict[str, Definition] = {\n            name: make_definition(self, name, definition)\n            for name, definition in stages_data.items()\n        }\n\n        self.artifacts = [\n            ArtifactDefinition(self, self.context, name, definition, ARTIFACTS_KWD)\n            for name, definition in d.get(ARTIFACTS_KWD, {}).items()\n        ]\n        self.datasets = [\n            TopDefinition(self, self.context, str(i), definition, DATASETS_KWD)\n            for i, definition in enumerate(d.get(DATASETS_KWD, []))\n        ]\n        self.metrics = [\n            TopDefinition(self, self.context, str(i), definition, METRICS_KWD)\n            for i, definition in enumerate(d.get(METRICS_KWD, []))\n        ]\n        self.params = [\n            TopDefinition(self, self.context, str(i), definition, PARAMS_KWD)\n            for i, definition in enumerate(d.get(PARAMS_KWD, []))\n        ]\n        self.plots = [\n            TopDefinition(self, self.context, str(i), definition, PLOTS_KWD)\n            for i, definition in enumerate(d.get(PLOTS_KWD, []))\n        ]\n\n    def resolve_one(self, name: str):\n        group, key = split_group_name(name)\n\n        if not self._has_group_and_key(group, key):\n            raise EntryNotFound(f\"Could not find '{name}'\")\n\n        # all of the checks for `key` not being None for\n        # `ForeachDefinition`/`MatrixDefinition`\n        # and/or `group` not existing in the `interim`, etc. should be\n        # handled by the `self.has_key()` above.\n        definition = self.definitions[group]\n        if isinstance(definition, EntryDefinition):\n            return definition.resolve()\n\n        assert key\n        return definition.resolve_one(key)\n\n    def resolve(self):\n        \"\"\"Used for testing purposes, otherwise use resolve_one().\"\"\"\n        data = join(map(self.resolve_one, self.get_keys()))\n        logger.trace(\"Resolved dvc.yaml:\\n%s\", data)\n        return {STAGES_KWD: data}\n\n    # Top-level sections are eagerly evaluated, whereas stages are lazily evaluated,\n    # one-by-one.\n\n    def resolve_artifacts(self) -> dict[str, Optional[dict[str, Any]]]:\n        d: dict[str, Optional[dict[str, Any]]] = {}\n        for item in self.artifacts:\n            d.update(item.resolve())\n        return d\n\n    def resolve_datasets(self) -> list[dict[str, Any]]:\n        return [item.resolve() for item in self.datasets]\n\n    def resolve_metrics(self) -> list[str]:\n        return [item.resolve() for item in self.metrics]\n\n    def resolve_params(self) -> list[str]:\n        return [item.resolve() for item in self.params]\n\n    def resolve_plots(self) -> list[Any]:\n        return [item.resolve() for item in self.plots]\n\n    def has_key(self, key: str):\n        return self._has_group_and_key(*split_group_name(key))\n\n    def _has_group_and_key(self, group: str, key: Optional[str] = None):\n        try:\n            definition = self.definitions[group]\n        except KeyError:\n            return False\n\n        if not isinstance(definition, (ForeachDefinition, MatrixDefinition)):\n            return key is None\n        return key is not None and definition.has_member(key)\n\n    @collecting\n    def get_keys(self):\n        for name, definition in self.definitions.items():\n            if isinstance(definition, (ForeachDefinition, MatrixDefinition)):\n                yield from definition.get_generated_names()\n                continue\n            yield name\n\n    def track_vars(self, name: str, vars_) -> None:\n        self.tracked_vars[name] = vars_\n\n\nclass EntryDefinition:\n    def __init__(\n        self,\n        resolver: DataResolver,\n        context: Context,\n        name: str,\n        definition: \"DictStrAny\",\n        where: str = STAGES_KWD,\n    ):\n        self.resolver = resolver\n        self.wdir = self.resolver.wdir\n        self.relpath = self.resolver.relpath\n        self.context = context\n        self.name = name\n        self.definition = definition\n        self.where = where\n\n    def _resolve_wdir(\n        self, context: Context, name: str, wdir: Optional[str] = None\n    ) -> str:\n        if not wdir:\n            return self.wdir\n\n        try:\n            wdir = to_str(context.resolve_str(wdir))\n        except (ContextError, ParseError) as exc:\n            format_and_raise(exc, f\"'{self.where}.{name}.wdir'\", self.relpath)\n        return self.resolver.fs.join(self.wdir, wdir)\n\n    def resolve(self, **kwargs):\n        try:\n            return self.resolve_stage(**kwargs)\n        except ContextError as exc:\n            format_and_raise(exc, f\"stage '{self.name}'\", self.relpath)\n\n    def resolve_stage(self, skip_checks: bool = False) -> \"DictStrAny\":\n        context = self.context\n        name = self.name\n        if not skip_checks:\n            # we can check for syntax errors as we go for interpolated entries,\n            # but for foreach and matrix generated ones, once is enough, which it does\n            # that itself. See `ForeachDefinition.template`\n            # and `MatrixDefinition.template`.\n            check_syntax_errors(self.definition, name, self.relpath)\n\n        # we need to pop vars from generated/evaluated data\n        definition = deepcopy(self.definition)\n\n        wdir = self._resolve_wdir(context, name, definition.get(WDIR_KWD))\n        vars_ = definition.pop(VARS_KWD, [])\n        # FIXME: Should `vars` be templatized?\n        check_interpolations(vars_, f\"{self.where}.{name}.vars\", self.relpath)\n        if vars_:\n            # Optimization: Lookahead if it has any vars, if it does not, we\n            # don't need to clone them.\n            context = Context.clone(context)\n\n        try:\n            fs = self.resolver.fs\n            context.load_from_vars(fs, vars_, wdir, stage_name=name)\n        except VarsAlreadyLoaded as exc:\n            format_and_raise(exc, f\"'{self.where}.{name}.vars'\", self.relpath)\n\n        logger.trace(\"Context during resolution of stage %s:\\n%s\", name, context)\n\n        with context.track() as tracked_data:\n            # NOTE: we do not pop \"wdir\", and resolve it again\n            # this does not affect anything and is done to try to\n            # track the source of `wdir` interpolation.\n            # This works because of the side-effect that we do not\n            # allow overwriting and/or str interpolating complex objects.\n            # Fix if/when those assumptions are no longer valid.\n            resolved = {\n                key: self._resolve(context, value, key, skip_checks)\n                for key, value in definition.items()\n            }\n\n        self.resolver.track_vars(name, tracked_data)\n        return {name: resolved}\n\n    def _resolve(\n        self, context: \"Context\", value: Any, key: str, skip_checks: bool\n    ) -> \"DictStrAny\":\n        try:\n            return context.resolve(\n                value,\n                skip_interpolation_checks=skip_checks,\n                key=key,\n                config=self.resolver.parsing_config,\n            )\n        except (ParseError, KeyNotInContext) as exc:\n            format_and_raise(exc, f\"'{self.where}.{self.name}.{key}'\", self.relpath)\n\n\nclass IterationPair(NamedTuple):\n    key: str = \"key\"\n    value: str = \"item\"\n\n\nclass ForeachDefinition:\n    def __init__(\n        self,\n        resolver: DataResolver,\n        context: Context,\n        name: str,\n        definition: \"DictStrAny\",\n        where: str = STAGES_KWD,\n    ):\n        self.resolver = resolver\n        self.relpath = self.resolver.relpath\n        self.context = context\n        self.name = name\n\n        assert DO_KWD in definition\n        assert MATRIX_KWD not in definition\n        self.foreach_data = definition[FOREACH_KWD]\n        self._template = definition[DO_KWD]\n\n        self.pair = IterationPair()\n        self.where = where\n\n    @cached_property\n    def template(self):\n        # optimization: check for syntax errors only once for `foreach` stages\n        check_syntax_errors(self._template, self.name, self.relpath)\n        return self._template\n\n    @cached_property\n    def resolved_iterable(self):\n        return self._resolve_foreach_data()\n\n    def _resolve_foreach_data(self) -> \"SeqOrMap\":\n        try:\n            iterable = self.context.resolve(self.foreach_data, unwrap=False)\n        except (ContextError, ParseError) as exc:\n            format_and_raise(exc, f\"'{self.where}.{self.name}.foreach'\", self.relpath)\n\n        # foreach data can be a resolved dictionary/list.\n        self._check_is_map_or_seq(iterable)\n        # foreach stages will have `item` and `key` added to the context\n        # so, we better warn them if they have them already in the context\n        # from the global vars. We could add them in `set_temporarily`, but\n        # that'd make it display for each iteration.\n        self._warn_if_overwriting(self._inserted_keys(iterable))\n        return iterable\n\n    def _check_is_map_or_seq(self, iterable):\n        if not is_map_or_seq(iterable):\n            node = iterable.value if isinstance(iterable, Node) else iterable\n            typ = type(node).__name__\n            raise ResolveError(\n                f\"failed to resolve '{self.where}.{self.name}.foreach'\"\n                f\" in '{self.relpath}': expected list/dictionary, got \" + typ\n            )\n\n    def _warn_if_overwriting(self, keys: list[str]):\n        warn_for = [k for k in keys if k in self.context]\n        if warn_for:\n            linking_verb = \"is\" if len(warn_for) == 1 else \"are\"\n            logger.warning(\n                (\n                    \"%s %s already specified, \"\n                    \"will be overwritten for stages generated from '%s'\"\n                ),\n                \" and \".join(warn_for),\n                linking_verb,\n                self.name,\n            )\n\n    def _inserted_keys(self, iterable) -> list[str]:\n        keys = [self.pair.value]\n        if isinstance(iterable, Mapping):\n            keys.append(self.pair.key)\n        return keys\n\n    @cached_property\n    def normalized_iterable(self):\n        \"\"\"Convert sequence to Mapping with keys normalized.\"\"\"\n        iterable = self.resolved_iterable\n        if isinstance(iterable, Mapping):\n            return {to_str(k): v for k, v in iterable.items()}\n\n        assert isinstance(iterable, Sequence)\n        if any(map(is_map_or_seq, iterable)):\n            # if the list contains composite data, index are the keys\n            return {to_str(idx): value for idx, value in enumerate(iterable)}\n\n        # for simple lists, eg: [\"foo\", \"bar\"],  contents are the key itself\n        return {to_str(value): value for value in iterable}\n\n    def has_member(self, key: str) -> bool:\n        return key in self.normalized_iterable\n\n    def get_generated_names(self):\n        return list(map(self._generate_name, self.normalized_iterable))\n\n    def _generate_name(self, key: str) -> str:\n        return f\"{self.name}{JOIN}{key}\"\n\n    def resolve_all(self) -> \"DictStrAny\":\n        return join(map(self.resolve_one, self.normalized_iterable))\n\n    def resolve_one(self, key: str) -> \"DictStrAny\":\n        return self._each_iter(key)\n\n    def _each_iter(self, key: str) -> \"DictStrAny\":\n        err_message = f\"Could not find '{key}' in foreach group '{self.name}'\"\n        with reraise(KeyError, EntryNotFound(err_message)):\n            value = self.normalized_iterable[key]\n\n        # NOTE: we need to use resolved iterable/foreach-data,\n        # not the normalized ones to figure out whether to make item/key\n        # available\n        inserted = self._inserted_keys(self.resolved_iterable)\n        temp_dict = {self.pair.value: value}\n        key_str = self.pair.key\n        if key_str in inserted:\n            temp_dict[key_str] = key\n\n        with self.context.set_temporarily(temp_dict, reserve=True):\n            # optimization: item and key can be removed on __exit__() as they\n            # are top-level values, and are not merged recursively.\n            # This helps us avoid cloning context, which is slower\n            # (increasing the size of the context might increase\n            # the no. of items to be generated which means more cloning,\n            # i.e. quadratic complexity).\n            generated = self._generate_name(key)\n            entry = EntryDefinition(\n                self.resolver, self.context, generated, self.template\n            )\n            try:\n                # optimization: skip checking for syntax errors on each foreach\n                # generated stages. We do it once when accessing template.\n                return entry.resolve_stage(skip_checks=True)\n            except ContextError as exc:\n                format_and_raise(exc, f\"stage '{generated}'\", self.relpath)\n\n\nclass MatrixDefinition:\n    def __init__(\n        self,\n        resolver: DataResolver,\n        context: Context,\n        name: str,\n        definition: \"DictStrAny\",\n        where: str = STAGES_KWD,\n    ):\n        self.resolver = resolver\n        self.relpath = self.resolver.relpath\n        self.context = context\n        self.name = name\n\n        assert MATRIX_KWD in definition\n        assert DO_KWD not in definition\n        assert FOREACH_KWD not in definition\n\n        self._template = definition.copy()\n        self.matrix_data = self._template.pop(MATRIX_KWD)\n\n        self.pair = IterationPair()\n        self.where = where\n\n    @cached_property\n    def template(self) -> \"DictStrAny\":\n        # optimization: check for syntax errors only once for `matrix` stages\n        check_syntax_errors(self._template, self.name, self.relpath)\n        return self._template\n\n    @cached_property\n    def resolved_iterable(self) -> dict[str, list]:\n        return self._resolve_matrix_data()\n\n    def _resolve_matrix_data(self) -> dict[str, list]:\n        try:\n            iterable = self.context.resolve(self.matrix_data, unwrap=False)\n        except (ContextError, ParseError) as exc:\n            format_and_raise(exc, f\"'{self.where}.{self.name}.matrix'\", self.relpath)\n\n        # Matrix entries will have `key` and `item` added to the context.\n        # Warn users if these are already in the context from the global vars.\n        self._warn_if_overwriting([self.pair.key, self.pair.value])\n        return iterable\n\n    def _warn_if_overwriting(self, keys: list[str]):\n        warn_for = [k for k in keys if k in self.context]\n        if warn_for:\n            linking_verb = \"is\" if len(warn_for) == 1 else \"are\"\n            logger.warning(\n                (\n                    \"%s %s already specified, \"\n                    \"will be overwritten for stages generated from '%s'\"\n                ),\n                \" and \".join(warn_for),\n                linking_verb,\n                self.name,\n            )\n\n    @cached_property\n    def normalized_iterable(self) -> dict[str, \"DictStrAny\"]:\n        \"\"\"Convert sequence to Mapping with keys normalized.\"\"\"\n        iterable = self.resolved_iterable\n        assert isinstance(iterable, Mapping)\n\n        ret: dict[str, DictStrAny] = {}\n        matrix = {key: enumerate(v) for key, v in iterable.items()}\n        for combination in product(*matrix.values()):\n            d: DictStrAny = {}\n            fragments: list[str] = []\n            for k, (i, v) in zip(matrix.keys(), combination):\n                d[k] = v\n                fragments.append(f\"{k}{i}\" if is_map_or_seq(v) else to_str(v))\n\n            key = \"-\".join(fragments)\n            ret[key] = d\n        return ret\n\n    def has_member(self, key: str) -> bool:\n        return key in self.normalized_iterable\n\n    def get_generated_names(self) -> list[str]:\n        return list(map(self._generate_name, self.normalized_iterable))\n\n    def _generate_name(self, key: str) -> str:\n        return f\"{self.name}{JOIN}{key}\"\n\n    def resolve_all(self) -> \"DictStrAny\":\n        return join(map(self.resolve_one, self.normalized_iterable))\n\n    def resolve_one(self, key: str) -> \"DictStrAny\":\n        return self._each_iter(key)\n\n    def _each_iter(self, key: str) -> \"DictStrAny\":\n        err_message = f\"Could not find '{key}' in matrix group '{self.name}'\"\n        with reraise(KeyError, EntryNotFound(err_message)):\n            value = self.normalized_iterable[key]\n\n        temp_dict = {self.pair.key: key, self.pair.value: value}\n        with self.context.set_temporarily(temp_dict, reserve=True):\n            # optimization: item and key can be removed on __exit__() as they\n            # are top-level values, and are not merged recursively.\n            # This helps us avoid cloning context, which is slower\n            # (increasing the size of the context might increase\n            # the no. of items to be generated which means more cloning,\n            # i.e. quadratic complexity).\n            generated = self._generate_name(key)\n            entry = EntryDefinition(\n                self.resolver, self.context, generated, self.template\n            )\n            try:\n                # optimization: skip checking for syntax errors on each matrix\n                # generated stages. We do it once when accessing template.\n                return entry.resolve_stage(skip_checks=True)\n            except ContextError as exc:\n                format_and_raise(exc, f\"stage '{generated}'\", self.relpath)\n\n\nclass TopDefinition:\n    def __init__(\n        self,\n        resolver: DataResolver,\n        context: Context,\n        name: str,\n        definition: \"Any\",\n        where: str,\n    ):\n        self.resolver = resolver\n        self.context = context\n        self.name = name\n        self.definition = definition\n        self.where = where\n        self.relpath = self.resolver.relpath\n\n    def resolve(self):\n        try:\n            check_recursive_parse_errors(self.definition)\n            return self.context.resolve(self.definition)\n        except (ParseError, ContextError) as exc:\n            format_and_raise(exc, f\"'{self.where}.{self.name}'\", self.relpath)\n\n\nclass ArtifactDefinition(TopDefinition):\n    def resolve(self) -> dict[str, Optional[dict[str, Any]]]:\n        try:\n            check_expression(self.name)\n            name = self.context.resolve(self.name)\n            if not isinstance(name, str):\n                typ = type(name).__name__\n                raise ResolveError(\n                    f\"failed to resolve '{self.where}.{self.name}'\"\n                    f\" in '{self.relpath}': expected str, got \" + typ\n                )\n        except (ParseError, ContextError) as exc:\n            format_and_raise(exc, f\"'{self.where}.{self.name}'\", self.relpath)\n        return {name: super().resolve()}\n"
  },
  {
    "path": "dvc/parsing/context.py",
    "content": "from abc import ABC, abstractmethod\nfrom collections import defaultdict\nfrom collections.abc import Mapping, MutableMapping, MutableSequence, Sequence\nfrom contextlib import contextmanager\nfrom copy import deepcopy\nfrom dataclasses import dataclass, field, replace\nfrom typing import Any, Optional, Union\n\nfrom funcy import identity, lfilter, nullcontext, select\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.parsing.interpolate import (\n    get_expression,\n    get_matches,\n    is_exact_string,\n    normalize_key,\n    recurse,\n    str_interpolate,\n    validate_value,\n)\n\nlogger = logger.getChild(__name__)\nSeqOrMap = Union[Sequence, Mapping]\nDictStr = dict[str, Any]\n\n\nclass ContextError(DvcException):\n    pass\n\n\nclass ReservedKeyError(ContextError):\n    def __init__(self, keys, path=None):\n        from dvc.utils.humanize import join\n\n        self.keys = keys\n        self.path = path\n\n        n = \"key\" + (\"s\" if len(keys) > 1 else \"\")\n        msg = f\"attempted to modify reserved {n} {join(keys)}\"\n        if path:\n            msg += f\" in '{path}'\"\n        super().__init__(msg)\n\n\nclass MergeError(ContextError):\n    def __init__(self, key, new, into):\n        self.key = key\n        to_node = into[key]\n        if not isinstance(to_node, Node) or not isinstance(new, Node):\n            super().__init__(f\"cannot merge '{key}' as it already exists in {into}\")\n            return\n\n        assert isinstance(to_node, Node)\n        assert isinstance(new, Node)\n        preexisting = to_node.meta.source\n        new_src = new.meta.source\n        path = new.meta.path()\n        super().__init__(\n            f\"cannot redefine '{path}' from '{new_src}'\"\n            f\" as it already exists in '{preexisting}'\"\n        )\n\n\nclass ParamsLoadError(ContextError):\n    pass\n\n\nclass KeyNotInContext(ContextError, KeyError):  # noqa: N818\n    def __init__(self, key: str) -> None:\n        self.key: str = key\n        super().__init__(f\"Could not find '{key}'\")\n\n    def __str__(self):\n        return self.msg\n\n\nclass VarsAlreadyLoaded(ContextError):  # noqa: N818\n    pass\n\n\ndef _merge(into, update, overwrite):\n    for key, val in update.items():\n        if isinstance(into.get(key), Mapping) and isinstance(val, Mapping):\n            _merge(into[key], val, overwrite)\n        else:\n            if key in into and not overwrite:\n                raise MergeError(key, val, into)\n            into[key] = val\n            assert isinstance(into[key], Node)\n\n\ndef recurse_not_a_node(data: dict):\n    def func(item):\n        assert not isinstance(item, Node)\n\n    return recurse(func)(data)\n\n\n@dataclass\nclass Meta:\n    source: Optional[str] = None\n    dpaths: list[str] = field(default_factory=list)\n    local: bool = True\n\n    @staticmethod\n    def update_path(meta: \"Meta\", path: Union[str, int]):\n        dpaths = [*meta.dpaths, str(path)]\n        return replace(meta, dpaths=dpaths)\n\n    def __str__(self):\n        string = self.source or \"<local>\"\n        string += \":\" + self.path()\n        return string\n\n    def path(self):\n        return \".\".join(self.dpaths)\n\n\ndef _default_meta() -> Meta:\n    return Meta()\n\n\nclass Node:\n    meta: Meta\n\n    def get_sources(self):\n        raise NotImplementedError\n\n    @property\n    @abstractmethod\n    def value(self):\n        pass\n\n\n@dataclass\nclass Value(Node):\n    _value: Any\n    meta: Meta = field(compare=False, default_factory=_default_meta, repr=False)\n\n    def __repr__(self):\n        return repr(self._value)\n\n    def __str__(self) -> str:\n        return str(self._value)\n\n    def get_sources(self):\n        return {self.meta.source: self.meta.path()}\n\n    @property\n    def value(self):\n        return self._value\n\n\nPRIMITIVES = (int, float, str, bytes, bool)\n\n\nclass Container(Node, ABC):  # noqa: PLW1641\n    meta: Meta\n    data: Union[list, dict]\n    _key_transform = staticmethod(identity)\n\n    def __init__(self, meta=None) -> None:\n        self.meta = meta or _default_meta()\n\n    def _convert(self, key, value):\n        meta = Meta.update_path(self.meta, key)\n        return self._convert_with_meta(value, meta)\n\n    @staticmethod\n    def _convert_with_meta(value, meta: Optional[Meta] = None):\n        if value is None or isinstance(value, PRIMITIVES):\n            assert meta\n            return Value(value, meta=meta)\n        if isinstance(value, Node):\n            return value\n        if isinstance(value, (list, dict)):\n            assert meta\n            if isinstance(value, dict):\n                return CtxDict(value, meta=meta)\n            return CtxList(value, meta=meta)\n        msg = f\"Unsupported value of type '{type(value).__name__}' in '{meta}'\"\n        raise TypeError(msg)\n\n    def __repr__(self):\n        return repr(self.data)\n\n    def __getitem__(self, key):\n        return self.data[key]\n\n    def __setitem__(self, key, value):\n        self.data[key] = self._convert(key, value)\n\n    def __delitem__(self, key):\n        del self.data[key]\n\n    def __len__(self):\n        return len(self.data)\n\n    def __iter__(self):\n        return iter(self.data)\n\n    def __eq__(self, o):\n        container = type(self)\n        if isinstance(o, container):\n            return o.data == self.data\n        return container(o) == self\n\n    def select(self, key: str):\n        index, *rems = key.split(sep=\".\", maxsplit=1)\n        index = index.strip()\n        index = self._key_transform(index)\n        try:\n            d = self[index]\n        except LookupError as exc:\n            raise ValueError(f\"Could not find '{index}' in {self.data}\") from exc\n\n        if not rems:\n            return d\n\n        rem = rems[0]\n        if not isinstance(d, Container):\n            raise ValueError(  # noqa: TRY004\n                f\"{index} is a primitive value, cannot get '{rem}'\"\n            )\n        return d.select(rem)\n\n    def get_sources(self):\n        return {}\n\n\nclass CtxList(Container, MutableSequence):\n    _key_transform = staticmethod(int)\n\n    def __init__(self, values: Sequence, meta: Optional[Meta] = None):\n        super().__init__(meta=meta)\n        self.data: list = []\n        self.extend(values)\n\n    def insert(self, index: int, value):\n        self.data.insert(index, self._convert(index, value))\n\n    def get_sources(self):\n        return {self.meta.source: self.meta.path()}\n\n    @property\n    def value(self):\n        return [node.value for node in self]\n\n    def __deepcopy__(self, _):\n        # optimization: we don't support overriding a list\n        new = CtxList([])\n        new.data = self.data[:]  # Short-circuiting __setitem__\n        return new\n\n\nclass CtxDict(Container, MutableMapping):\n    def __init__(\n        self,\n        mapping: Optional[Mapping] = None,\n        meta: Optional[Meta] = None,\n        **kwargs,\n    ):\n        super().__init__(meta=meta)\n\n        self.data: dict = {}\n        if mapping:\n            self.update(mapping)\n        self.update(kwargs)\n\n    def __setitem__(self, key, value):\n        if not isinstance(key, str):\n            # limitation for the interpolation\n            # ignore other kinds of keys\n            return None\n        return super().__setitem__(key, value)\n\n    def merge_update(self, other, overwrite=False):\n        _merge(self, other, overwrite=overwrite)\n\n    @property\n    def value(self):\n        return {key: node.value for key, node in self.items()}\n\n    def __deepcopy__(self, _):\n        new = CtxDict()\n        for k, v in self.items():\n            new.data[k] = (\n                deepcopy(v) if isinstance(v, Container) else v\n            )  # short-circuiting __setitem__\n        return new\n\n\nclass Context(CtxDict):\n    def __init__(self, *args, **kwargs):\n        \"\"\"\n        Top level mutable dict, with some helpers to create context and track\n        \"\"\"\n        super().__init__(*args, **kwargs)\n        self._track = False\n        self._tracked_data: dict[str, dict] = defaultdict(dict)\n        self.imports = {}\n        self._reserved_keys = {}\n\n    @contextmanager\n    def track(self):\n        self._track = True\n        yield self._tracked_data\n\n        self._track = False\n        self._tracked_data = defaultdict(dict)\n\n    def _track_data(self, node):\n        if not self._track or not isinstance(node, Node):\n            return\n\n        assert isinstance(node, Node)\n        if node.meta and node.meta.local:\n            return\n\n        for source, keys in node.get_sources().items():\n            if not source:\n                continue\n            params_file = self._tracked_data[source]\n            keys = [keys] if isinstance(keys, str) else keys\n            params_file.update(dict.fromkeys(keys, node.value))\n\n    def select(self, key: str, unwrap: bool = False):\n        \"\"\"Select the item using key, similar to `__getitem__`\n           but can track the usage of the data on interpolation\n           as well and can get from nested data structure by using\n           \".\" separated key (eg: \"key1.key2.key3\")\n\n        Args:\n            key: key to select value from\n            unwrap: Convert CtxList/CtxDict/Value items to it's original data\n                    Defaults to False. Note that the default is different from\n                    `resolve`.\n        \"\"\"\n        normalized = normalize_key(key)\n        try:\n            node = super().select(normalized)\n        except ValueError as exc:\n            raise KeyNotInContext(key) from exc\n\n        assert isinstance(node, Node)\n        self._track_data(node)\n        return node.value if unwrap else node\n\n    @classmethod\n    def load_from(\n        cls, fs, path: str, select_keys: Optional[list[str]] = None\n    ) -> \"Context\":\n        from dvc.utils.serialize import load_path\n\n        if not fs.exists(path):\n            raise ParamsLoadError(f\"'{path}' does not exist\")\n        if fs.isdir(path):\n            raise ParamsLoadError(f\"'{path}' is a directory\")\n\n        data = load_path(path, fs)\n        if not isinstance(data, Mapping):\n            typ = type(data).__name__\n            raise ParamsLoadError(\n                f\"expected a dictionary, got '{typ}' in file '{path}'\"\n            )\n\n        if select_keys:\n            try:\n                data = {key: data[key] for key in select_keys}\n            except KeyError as exc:\n                key, *_ = exc.args\n                raise ParamsLoadError(f\"could not find '{key}' in '{path}'\") from exc\n\n        meta = Meta(source=path, local=False)\n        ctx = cls(data, meta=meta)\n        ctx.imports[path] = select_keys\n        return ctx\n\n    def merge_update(self, other: \"Context\", overwrite=False):\n        matches = select(lambda key: key in other, self._reserved_keys.keys())\n        if matches:\n            raise ReservedKeyError(matches)\n        return super().merge_update(other, overwrite=overwrite)\n\n    def merge_from(self, fs, item: str, wdir: str, overwrite=False):\n        path, _, keys_str = item.partition(\":\")\n        path = fs.normpath(fs.join(wdir, path))\n\n        select_keys = lfilter(bool, keys_str.split(\",\")) if keys_str else None\n        if path in self.imports:\n            if not select_keys and self.imports[path] is None:\n                return  # allow specifying complete filepath multiple times\n            self.check_loaded(path, item, select_keys)\n\n        ctx = Context.load_from(fs, path, select_keys)\n\n        try:\n            self.merge_update(ctx, overwrite=overwrite)\n        except ReservedKeyError as exc:\n            raise ReservedKeyError(exc.keys, item) from exc\n\n        cp = ctx.imports[path]\n        if path not in self.imports:\n            self.imports[path] = cp\n        elif cp:\n            self.imports[path].extend(cp)\n\n    def check_loaded(self, path, item, keys):\n        imported = self.imports[path]\n        if not keys and isinstance(imported, list):\n            raise VarsAlreadyLoaded(\n                f\"cannot load '{item}' as it's partially loaded already\"\n            )\n        if keys and imported is None:\n            raise VarsAlreadyLoaded(\n                f\"cannot partially load '{item}' as it's already loaded.\"\n            )\n        if isinstance(imported, list) and set(keys) & set(imported):\n            raise VarsAlreadyLoaded(\n                f\"cannot load '{item}' as it's partially loaded already\"\n            )\n\n    def load_from_vars(\n        self,\n        fs,\n        vars_: list,\n        wdir: str,\n        stage_name: Optional[str] = None,\n        default: Optional[str] = None,\n    ):\n        if default:\n            to_import = fs.join(wdir, default)\n            if fs.exists(to_import):\n                self.merge_from(fs, default, wdir)\n            else:\n                msg = \"%s does not exist, it won't be used in parametrization\"\n                logger.trace(msg, to_import)\n\n        stage_name = stage_name or \"\"\n        for index, item in enumerate(vars_):\n            assert isinstance(item, (str, dict))\n            if isinstance(item, str):\n                self.merge_from(fs, item, wdir)\n            else:\n                joiner = \".\" if stage_name else \"\"\n                meta = Meta(source=f\"{stage_name}{joiner}vars[{index}]\")\n                self.merge_update(Context(item, meta=meta))\n\n    def __deepcopy__(self, _):\n        new = Context(super().__deepcopy__(_))\n        new.meta = deepcopy(self.meta)\n        new.imports = deepcopy(self.imports)\n        new._reserved_keys = deepcopy(self._reserved_keys)\n        return new\n\n    @classmethod\n    def clone(cls, ctx: \"Context\") -> \"Context\":\n        \"\"\"Clones given context.\"\"\"\n        return deepcopy(ctx)\n\n    @contextmanager\n    def reserved(self, *keys: str):\n        \"\"\"Allow reserving some keys so that they cannot be overwritten.\n\n        Ideally, we should delegate this to a separate container\n        and support proper namespacing so that we could support `env` features.\n        But for now, just `item` and `key`, this should do.\n        \"\"\"\n        # using dict to make the error messages ordered\n        new = dict.fromkeys([key for key in keys if key not in self._reserved_keys])\n        self._reserved_keys.update(new)\n        try:\n            yield\n        finally:\n            for key in new:\n                self._reserved_keys.pop(key)\n\n    @contextmanager\n    def set_temporarily(self, to_set: DictStr, reserve: bool = False):\n        cm = self.reserved(*to_set) if reserve else nullcontext()\n\n        non_existing = frozenset(to_set.keys() - self.keys())\n        prev = {key: self[key] for key in to_set if key not in non_existing}\n        temp = CtxDict(to_set)\n        self.update(temp)\n\n        try:\n            with cm:\n                yield\n        finally:\n            self.update(prev)\n            for key in non_existing:\n                self.data.pop(key, None)\n\n    def resolve(\n        self,\n        src,\n        unwrap=True,\n        skip_interpolation_checks=False,\n        key=None,\n        config=None,\n    ) -> Any:\n        \"\"\"Recursively resolves interpolation and returns resolved data.\n\n        Args:\n            src: Data (str/list/dict etc.) to resolve\n            unwrap: Unwrap CtxDict/CtxList/Value to it's original data if\n                inside `src`. Defaults to True.\n            skip_interpolation_checks: Skip interpolation checks for error\n                The callee is responsible to check for errors in advance.\n\n        >>> c = Context({\"three\": 3})\n        >>> c.resolve({\"lst\": [1, 2, \"${three}\"]})\n        {'lst': [1, 2, 3]}\n        \"\"\"\n        func = recurse(self.resolve_str)\n        return func(src, unwrap, skip_interpolation_checks, key, config)\n\n    def resolve_str(\n        self,\n        src: str,\n        unwrap=True,\n        skip_interpolation_checks=False,\n        key=None,\n        config=None,\n    ) -> str:\n        \"\"\"Resolves interpolated string to it's original value,\n        or in case of multiple interpolations, a combined string.\n\n        >>> c = Context({\"enabled\": True})\n        >>> c.resolve_str(\"${enabled}\")\n        True\n        >>> c.resolve_str(\"enabled? ${enabled}\")\n        'enabled? true'\n        \"\"\"\n        matches = get_matches(src)\n        if is_exact_string(src, matches):\n            # replace \"${enabled}\", if `enabled` is a boolean, with it's actual\n            # value rather than it's string counterparts.\n            expr = get_expression(matches[0], skip_checks=skip_interpolation_checks)\n            value = self.select(expr, unwrap=unwrap)\n            validate_value(value, key)\n            return value\n        # but not \"${num} days\"\n        return str_interpolate(\n            src,\n            matches,\n            self,\n            skip_checks=skip_interpolation_checks,\n            key=key,\n            config=config,\n        )\n\n\nif __name__ == \"__main__\":\n    import doctest\n\n    doctest.testmod()\n"
  },
  {
    "path": "dvc/parsing/interpolate.py",
    "content": "import os\nimport re\nimport typing\nfrom collections.abc import Iterable, Mapping\nfrom functools import singledispatch\n\nfrom funcy import memoize, rpartial\n\nfrom dvc.exceptions import DvcException\nfrom dvc.utils.flatten import flatten\n\nif typing.TYPE_CHECKING:\n    from re import Match\n    from typing import NoReturn\n\n    from pyparsing import ParseException\n\n    from .context import Context\n\nBRACE_OPEN = \"${\"\nBRACE_CLOSE = \"}\"\nLBRACK = \"[\"\nRBRACK = \"]\"\nPERIOD = \".\"\nKEYCRE = re.compile(\n    r\"\"\"\n    (?<!\\\\)                            # escape \\${}\n    \\${                                # starts with ${\n    (?P<inner>.*?)                     # match every char inside\n    }                                  # end with {\n\"\"\",\n    re.VERBOSE,\n)\n\n\n@memoize\ndef get_parser():\n    from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore\n\n    ParserElement.enable_packrat()\n\n    word = CharsNotIn(f\"{PERIOD}{LBRACK}{RBRACK}\")\n    idx = Suppress(LBRACK) + word + Suppress(RBRACK)\n    attr = Suppress(PERIOD) + word\n    parser = word + ZeroOrMore(attr ^ idx)\n    parser.set_parse_action(PERIOD.join)\n\n    return parser\n\n\nclass ParseError(DvcException):\n    pass\n\n\ndef get_matches(template: str):\n    return list(KEYCRE.finditer(template))\n\n\ndef is_interpolated_string(val):\n    return isinstance(val, str) and bool(get_matches(val))\n\n\ndef normalize_key(key: str):\n    return key.replace(LBRACK, PERIOD).replace(RBRACK, \"\")\n\n\ndef format_and_raise_parse_error(exc) -> \"NoReturn\":\n    raise ParseError(_format_exc_msg(exc))\n\n\ndef embrace(s: str):\n    return BRACE_OPEN + s + BRACE_CLOSE\n\n\ndef escape_str(value):\n    if os.name == \"nt\":\n        from subprocess import list2cmdline\n\n        return list2cmdline([value])\n    from shlex import quote\n\n    return quote(value)\n\n\n@singledispatch\ndef to_str(obj, config=None) -> str:  # noqa: ARG001\n    return str(obj)\n\n\n@to_str.register(bool)\ndef _(obj: bool, config=None):  # noqa: ARG001\n    return \"true\" if obj else \"false\"\n\n\n@to_str.register(dict)\ndef _(obj: dict, config=None):  # noqa: C901\n    config = config or {}\n\n    result = \"\"\n    for k, v in flatten(obj).items():\n        if isinstance(v, bool):\n            if v:\n                result += f\"--{k} \"\n            elif config.get(\"bool\", \"store_true\") == \"boolean_optional\":\n                result += f\"--no-{k} \"\n\n        elif isinstance(v, str):\n            result += f\"--{k} {escape_str(v)} \"\n\n        elif isinstance(v, Iterable):\n            for n, i in enumerate(v):\n                if isinstance(i, str):\n                    i = escape_str(i)\n                elif isinstance(i, Iterable):\n                    raise ParseError(f\"Cannot interpolate nested iterable in '{k}'\")\n\n                if config.get(\"list\", \"nargs\") == \"append\":\n                    result += f\"--{k} {i} \"\n                else:\n                    result += f\"{i} \" if n > 0 else f\"--{k} {i} \"\n\n        else:\n            result += f\"--{k} {v} \"\n\n    return result.rstrip()\n\n\ndef _format_exc_msg(exc: \"ParseException\"):\n    from pyparsing import ParseException\n\n    from dvc.utils import colorize\n\n    exc.loc += 2  # 2 because we append `${` at the start of expr below\n\n    expr = exc.pstr\n    exc.pstr = embrace(exc.pstr)\n    error = ParseException.explain(exc, depth=0)\n\n    _, pointer, *explains = error.splitlines()\n    pstr = \"{brace_open}{expr}{brace_close}\".format(\n        brace_open=colorize(BRACE_OPEN, color=\"blue\"),\n        expr=colorize(expr, color=\"magenta\"),\n        brace_close=colorize(BRACE_CLOSE, color=\"blue\"),\n    )\n    msg = \"\\n\".join(explains)\n    pointer = colorize(pointer, color=\"red\")\n    return \"\\n\".join([pstr, pointer, colorize(msg, color=\"red\", style=\"bold\")])\n\n\ndef recurse(f):\n    seq = (list, tuple, set)\n\n    def wrapper(data, *args):\n        g = rpartial(wrapper, *args)\n        if isinstance(data, Mapping):\n            return {g(k): g(v) for k, v in data.items()}\n        if isinstance(data, seq):\n            return type(data)(map(g, data))\n        if isinstance(data, str):\n            return f(data, *args)\n        return data\n\n    return wrapper\n\n\ndef check_recursive_parse_errors(data):\n    func = recurse(check_expression)\n    return func(data)\n\n\ndef check_expression(s: str):\n    matches = get_matches(s)\n    for match in matches:\n        get_expression(match)\n\n\ndef parse_expr(s: str):\n    from pyparsing import ParseException\n\n    try:\n        result = get_parser().parse_string(s, parse_all=True)\n    except ParseException as exc:\n        format_and_raise_parse_error(exc)\n        raise AssertionError(\"unreachable\")  # noqa: B904\n\n    joined = result.asList()\n    assert len(joined) == 1\n    return joined[0]\n\n\ndef get_expression(match: \"Match\", skip_checks: bool = False):\n    inner = match[\"inner\"]\n    return inner if skip_checks else parse_expr(inner)\n\n\ndef validate_value(value, key):\n    from .context import PRIMITIVES\n\n    not_primitive = value is not None and not isinstance(value, PRIMITIVES)\n    not_foreach = key is not None and \"foreach\" not in key\n    if not_primitive and not_foreach:\n        if isinstance(value, dict) and key == \"cmd\":\n            return True\n        raise ParseError(f\"Cannot interpolate data of type '{type(value).__name__}'\")\n    return None\n\n\ndef str_interpolate(\n    template: str,\n    matches: \"list[Match]\",\n    context: \"Context\",\n    skip_checks: bool = False,\n    key=None,\n    config=None,\n):\n    index, buf = 0, \"\"\n    for match in matches:\n        start, end = match.span(0)\n        expr = get_expression(match, skip_checks=skip_checks)\n        value = context.select(expr, unwrap=True)\n        validate_value(value, key)\n        buf += template[index:start] + to_str(value, config=config)\n        index = end\n    buf += template[index:]\n    # regex already backtracks and avoids any `${` starting with\n    # backslashes(`\\`). We just need to replace those by `${`.\n    return buf.replace(r\"\\${\", BRACE_OPEN)\n\n\ndef is_exact_string(src: str, matches: \"list[Match]\"):\n    return len(matches) == 1 and src == matches[0].group(0)\n"
  },
  {
    "path": "dvc/pathspec_math.py",
    "content": "# Path Specification Pattern Math\n# Including changing base dir of path specification patterns and merging\n# of two path specification patterns with different base\n# All the operations follow the documents of `gitignore`\nfrom typing import NamedTuple\n\nfrom pathspec.util import normalize_file\n\nfrom dvc.utils import relpath\n\n\nclass PatternInfo(NamedTuple):\n    patterns: str\n    file_info: str\n\n    def __str__(self) -> str:\n        return self.file_info or f\":{self.patterns}\"\n\n\ndef _not_ignore(rule):\n    return (True, rule[1:]) if rule.startswith(\"!\") else (False, rule)\n\n\ndef _is_comment(rule):\n    return rule.startswith(\"#\")\n\n\ndef _remove_slash(rule):\n    if rule.startswith(\"\\\\\"):\n        return rule[1:]\n    return rule\n\n\ndef _match_all_level(rule):\n    if rule[:-1].find(\"/\") >= 0 and not rule.startswith(\"**/\"):\n        rule = rule.removeprefix(\"/\")\n        return False, rule\n    rule = rule.removeprefix(\"**/\")\n    return True, rule\n\n\ndef change_rule(rule, rel):\n    rule = rule.strip()\n    if _is_comment(rule):\n        return rule\n    not_ignore, rule = _not_ignore(rule)\n    match_all, rule = _match_all_level(rule)\n    rule = _remove_slash(rule)\n    if not match_all:\n        rule = f\"/{rule}\"\n    else:\n        rule = f\"/**/{rule}\"\n    if not_ignore:\n        rule = f\"!/{rel}{rule}\"\n    else:\n        rule = f\"/{rel}{rule}\"\n    return normalize_file(rule)\n\n\ndef _change_dirname(dirname, pattern_list, new_dirname):\n    if new_dirname == dirname:\n        return pattern_list\n    rel = relpath(dirname, new_dirname)\n    if rel.startswith(\"..\"):\n        raise ValueError(\"change dirname can only change to parent path\")\n\n    return [\n        PatternInfo(change_rule(rule.patterns, rel), rule.file_info)\n        for rule in pattern_list\n    ]\n\n\ndef merge_patterns(flavour, pattern_a, prefix_a, pattern_b, prefix_b):\n    \"\"\"\n    Merge two path specification patterns.\n\n    This implementation merge two path specification patterns on different\n    bases. It returns the longest common parent directory, and the patterns\n    based on this new base directory.\n    \"\"\"\n    if not pattern_a:\n        return pattern_b, prefix_b\n    if not pattern_b:\n        return pattern_a, prefix_a\n\n    longest_common_dir = flavour.commonpath([prefix_a, prefix_b])\n    new_pattern_a = _change_dirname(prefix_a, pattern_a, longest_common_dir)\n    new_pattern_b = _change_dirname(prefix_b, pattern_b, longest_common_dir)\n\n    if len(prefix_a) <= len(prefix_b):\n        merged_pattern = new_pattern_a + new_pattern_b\n    else:\n        merged_pattern = new_pattern_b + new_pattern_a\n\n    return merged_pattern, longest_common_dir\n"
  },
  {
    "path": "dvc/progress.py",
    "content": "\"\"\"Manages progress bars for DVC repo.\"\"\"\n\nimport logging\nimport sys\nfrom threading import RLock\nfrom typing import TYPE_CHECKING, Any, ClassVar\n\nfrom tqdm import tqdm\n\nfrom dvc.env import DVC_IGNORE_ISATTY\nfrom dvc.utils import env2bool\n\nif TYPE_CHECKING:\n    from dvc.fs.callbacks import TqdmCallback\n\nlogger = logging.getLogger(__name__)\ntqdm.set_lock(RLock())\n\n\nclass Tqdm(tqdm):\n    \"\"\"\n    maximum-compatibility tqdm-based progressbars\n    \"\"\"\n\n    BAR_FMT_DEFAULT = (\n        \"{percentage:3.0f}% {desc}|{bar}|\"\n        \"{postfix[info]}{n_fmt}/{total_fmt}\"\n        \" [{elapsed}<{remaining}, {rate_fmt:>11}]\"\n    )\n    # nested bars should have fixed bar widths to align nicely\n    BAR_FMT_DEFAULT_NESTED = (\n        \"{percentage:3.0f}%|{bar:10}|{desc:{ncols_desc}.{ncols_desc}}\"\n        \"{postfix[info]}{n_fmt}/{total_fmt}\"\n        \" [{elapsed}<{remaining}, {rate_fmt:>11}]\"\n    )\n    BAR_FMT_NOTOTAL = \"{desc}{bar:b}|{postfix[info]}{n_fmt} [{elapsed}, {rate_fmt:>11}]\"\n    BYTES_DEFAULTS: ClassVar[dict[str, Any]] = {\n        \"unit\": \"B\",\n        \"unit_scale\": True,\n        \"unit_divisor\": 1024,\n        \"miniters\": 1,\n    }\n\n    def __init__(\n        self,\n        iterable=None,\n        disable=None,\n        level=logging.ERROR,\n        desc=None,\n        leave=False,\n        bar_format=None,\n        bytes=False,  # noqa: A002\n        file=None,\n        total=None,\n        postfix=None,\n        **kwargs,\n    ):\n        \"\"\"\n        bytes   : shortcut for\n            `unit='B', unit_scale=True, unit_divisor=1024, miniters=1`\n        desc  : persists after `close()`\n        level  : effective logging level for determining `disable`;\n            used only if `disable` is unspecified\n        disable  : If (default: None) or False,\n            will be determined by logging level.\n            May be overridden to `True` due to non-TTY status.\n            Skip override by specifying env var `DVC_IGNORE_ISATTY`.\n        kwargs  : anything accepted by `tqdm.tqdm()`\n        \"\"\"\n        kwargs = kwargs.copy()\n        if bytes:\n            kwargs = self.BYTES_DEFAULTS | kwargs\n        else:\n            kwargs.setdefault(\"unit_scale\", total > 999 if total else True)\n        if file is None:\n            file = sys.stderr\n        # auto-disable based on `logger.level`\n        if not disable:\n            disable = logger.getEffectiveLevel() > level\n        # auto-disable based on TTY\n        if not disable and not env2bool(DVC_IGNORE_ISATTY) and hasattr(file, \"isatty\"):\n            disable = not file.isatty()\n        super().__init__(\n            iterable=iterable,\n            disable=disable,\n            leave=leave,\n            desc=desc,\n            bar_format=\"!\",\n            lock_args=(False,),\n            total=total,\n            **kwargs,\n        )\n        self.postfix = postfix or {\"info\": \"\"}\n        if bar_format is None:\n            if self.__len__():\n                self.bar_format = (\n                    self.BAR_FMT_DEFAULT_NESTED if self.pos else self.BAR_FMT_DEFAULT\n                )\n            else:\n                self.bar_format = self.BAR_FMT_NOTOTAL\n        else:\n            self.bar_format = bar_format\n        self.refresh()\n\n    def update_msg(self, msg: str, n: int = 1) -> None:\n        \"\"\"\n        Sets `msg` as a postfix and calls `update(n)`.\n        \"\"\"\n        self.set_msg(msg)\n        self.update(n)\n\n    def set_msg(self, msg: str) -> None:\n        self.postfix[\"info\"] = f\" {msg} |\"\n\n    def update_to(self, current, total=None):\n        if total:\n            self.total = total\n        self.update(current - self.n)\n\n    def wrap_fn(self, fn, callback=None):\n        \"\"\"\n        Returns a wrapped `fn` which calls `callback()` on each call.\n        `callback` is `self.update` by default.\n        \"\"\"\n        if callback is None:\n            callback = self.update\n\n        def wrapped(*args, **kwargs):\n            res = fn(*args, **kwargs)\n            callback()\n            return res\n\n        return wrapped\n\n    def close(self):\n        self.postfix[\"info\"] = \"\"\n        # remove ETA (either unknown or zero); remove completed bar\n        self.bar_format = self.bar_format.replace(\"<{remaining}\", \"\").replace(\n            \"|{bar:10}|\", \" \"\n        )\n        super().close()\n\n    @property\n    def format_dict(self):\n        \"\"\"inject `ncols_desc` to fill the display width (`ncols`)\"\"\"\n        d = super().format_dict\n        ncols: int = d[\"ncols\"] or 80\n        # assumes `bar_format` has max one of (\"ncols_desc\" & \"ncols_info\")\n        ncols_left = (\n            ncols\n            - len(\n                self.format_meter(  # type: ignore[call-arg]\n                    ncols_desc=1, ncols_info=1, **d\n                )\n            )\n            + 1\n        )\n        ncols_left = max(ncols_left, 0)\n        if ncols_left:\n            d[\"ncols_desc\"] = d[\"ncols_info\"] = ncols_left\n        else:\n            # work-around for zero-width description\n            d[\"ncols_desc\"] = d[\"ncols_info\"] = 1\n            d[\"prefix\"] = \"\"\n        return d\n\n    def as_callback(self) -> \"TqdmCallback\":\n        from dvc.fs.callbacks import TqdmCallback\n\n        return TqdmCallback(progress_bar=self)\n"
  },
  {
    "path": "dvc/prompt.py",
    "content": "\"\"\"Manages user prompts.\"\"\"\n\nfrom collections.abc import Collection\nfrom getpass import getpass\nfrom typing import Optional\n\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\ndef ask(prompt: str, limited_to: Optional[Collection[str]] = None):\n    from dvc.ui import Console\n\n    if not Console.isatty():\n        return None\n\n    while True:\n        try:\n            answer = input(prompt + \" \").lower()\n        except EOFError:\n            return None\n\n        if not limited_to:\n            return answer\n\n        if answer in limited_to:\n            return answer\n\n        logger.info(\"Your response must be one of: %s. Please try again.\", limited_to)\n\n\ndef confirm(statement: str) -> bool:\n    \"\"\"Ask the user for confirmation about the specified statement.\n\n    Args:\n        statement (unicode): statement to ask the user confirmation about.\n\n    Returns:\n        bool: whether or not specified statement was confirmed.\n    \"\"\"\n    prompt = f\"{statement} [y/n]\"\n    answer = ask(prompt, limited_to=[\"yes\", \"no\", \"y\", \"n\"])\n    return answer and answer.startswith(\"y\")\n\n\ndef password(statement: str) -> str:\n    \"\"\"Ask the user for a password.\n\n    Args:\n        statement (str): string to prompt the user with.\n\n    Returns:\n        str: password entered by the user.\n    \"\"\"\n    logger.info(\"%s: \", statement)\n    return getpass(\"\")\n"
  },
  {
    "path": "dvc/render/__init__.py",
    "content": "INDEX = \"step\"\nREVISION = \"rev\"\nFILENAME = \"filename\"\nFIELD = \"field\"\nREVISIONS = \"revisions\"\nANCHOR_DEFINITIONS = \"anchor_definitions\"\nTYPE_KEY = \"type\"\nSRC = \"src\"\n"
  },
  {
    "path": "dvc/render/convert.py",
    "content": "from typing import Union\n\nfrom dvc.render import REVISION, REVISIONS, SRC, TYPE_KEY\nfrom dvc.render.converter.image import ImageConverter\nfrom dvc.render.converter.vega import VegaConverter\n\n\ndef _get_converter(\n    renderer_class, renderer_id, props, data\n) -> Union[VegaConverter, ImageConverter]:\n    from dvc_render import ImageRenderer, VegaRenderer\n\n    if renderer_class.TYPE == VegaRenderer.TYPE:\n        return VegaConverter(renderer_id, data, props)\n    if renderer_class.TYPE == ImageRenderer.TYPE:\n        return ImageConverter(renderer_id, data, props)\n\n    raise ValueError(f\"Invalid renderer class {renderer_class}\")\n\n\ndef to_json(renderer, split: bool = False) -> list[dict]:\n    if renderer.TYPE == \"vega\":\n        if not renderer.datapoints:\n            return []\n        revs = renderer.get_revs()\n        if split:\n            content, split_content = renderer.get_partial_filled_template()\n        else:\n            content = renderer.get_filled_template()\n            split_content = {}\n\n        return [\n            {\n                TYPE_KEY: renderer.TYPE,\n                REVISIONS: revs,\n                \"content\": content,\n                **split_content,\n            }\n        ]\n    if renderer.TYPE == \"image\":\n        return [\n            {\n                TYPE_KEY: renderer.TYPE,\n                REVISIONS: [datapoint.get(REVISION)],\n                \"url\": datapoint.get(SRC),\n            }\n            for datapoint in renderer.datapoints\n        ]\n    raise ValueError(f\"Invalid renderer: {renderer.TYPE}\")\n"
  },
  {
    "path": "dvc/render/converter/__init__.py",
    "content": "from typing import Any, Optional\n\n\nclass Converter:\n    def __init__(\n        self,\n        plot_id: str,\n        data: Optional[dict[str, Any]] = None,\n        properties: Optional[dict] = None,\n    ):\n        self.plot_id = plot_id\n        self.properties = properties or {}\n        self.data = data or {}\n\n    def convert(self) -> tuple[list[tuple[str, str, Any]], dict]:\n        raise NotImplementedError\n\n    def flat_datapoints(self, revision: str) -> tuple[list[dict], dict]:\n        raise NotImplementedError\n"
  },
  {
    "path": "dvc/render/converter/image.py",
    "content": "import base64\nimport os\nfrom typing import TYPE_CHECKING, Any\n\nfrom dvc.render import FILENAME, REVISION, SRC\n\nfrom . import Converter\n\nif TYPE_CHECKING:\n    from dvc.types import StrPath\n\n\nclass ImageConverter(Converter):\n    @staticmethod\n    def _write_image(\n        path: \"StrPath\",\n        revision: str,\n        filename: str,\n        image_data: bytes,\n    ) -> \"StrPath\":\n        img_path = os.path.join(\n            path,\n            f\"{revision}_{filename}\".replace(os.sep, \"_\").replace(\"/\", \"_\"),\n        )\n        with open(img_path, \"wb\") as fd:\n            fd.write(image_data)\n\n        return img_path\n\n    @staticmethod\n    def _encode_image(image_data: bytes) -> str:\n        base64_str = base64.b64encode(image_data).decode()\n        return f\"data:image;base64,{base64_str}\"\n\n    def convert(self) -> tuple[list[tuple[str, str, Any]], dict]:\n        datas = []\n        for filename, image_data in self.data.items():\n            datas.append((filename, \"\", image_data))\n        return datas, self.properties\n\n    def flat_datapoints(self, revision: str) -> tuple[list[dict], dict]:\n        \"\"\"\n        Convert the DVC Plots content to DVC Render datapoints.\n        Return both generated datapoints and updated properties.\n        \"\"\"\n        path = self.properties.get(\"out\")\n        datapoints = []\n        datas, properties = self.convert()\n        for filename, _, image_data in datas:\n            if path:\n                if not os.path.isdir(path):\n                    os.makedirs(path, exist_ok=True)\n                src = self._write_image(\n                    os.path.abspath(path), revision, filename, image_data\n                )\n            else:\n                src = self._encode_image(image_data)\n            datapoint = {REVISION: revision, FILENAME: filename, SRC: src}\n            datapoints.append(datapoint)\n        return datapoints, properties\n"
  },
  {
    "path": "dvc/render/converter/vega.py",
    "content": "import os\nfrom collections.abc import Iterable\nfrom typing import Any, Optional, Union\n\nfrom funcy import first, last\n\nfrom dvc.exceptions import DvcException\nfrom dvc.render import FIELD, FILENAME, INDEX, REVISION\n\nfrom . import Converter\n\n\nclass FieldNotFoundError(DvcException):\n    def __init__(self, expected_field, found_fields):\n        found_str = \", \".join(found_fields)\n        super().__init__(\n            f\"Could not find provided field ('{expected_field}') \"\n            f\"in data fields ('{found_str}').\"\n        )\n\n\ndef _lists(blob: Union[dict, list]) -> Iterable[list]:\n    if isinstance(blob, list):\n        yield blob\n    else:\n        for value in blob.values():\n            if isinstance(value, dict):\n                yield from _lists(value)\n            elif isinstance(value, list):\n                yield value\n\n\ndef _file_field(*args):\n    for axis_def in args:\n        if axis_def is not None:\n            for file, val in axis_def.items():\n                if isinstance(val, str):\n                    yield file, val\n                elif isinstance(val, list):\n                    for field in val:\n                        yield file, field\n\n\ndef _find(filename: str, field: str, data_series: list[tuple[str, str, Any]]):\n    for data_file, data_field, data in data_series:\n        if data_file == filename and data_field == field:\n            return data_file, data_field, data\n    return None\n\n\ndef _verify_field(file2datapoints: dict[str, list], filename: str, field: str):\n    if filename in file2datapoints:\n        datapoint = first(file2datapoints[filename])\n        if field not in datapoint:\n            raise FieldNotFoundError(field, datapoint.keys())\n\n\ndef _get_xs(properties: dict, file2datapoints: dict[str, list[dict]]):\n    x = properties.get(\"x\")\n    if x is not None and isinstance(x, dict):\n        for filename, field in _file_field(x):\n            _verify_field(file2datapoints, filename, field)\n            yield filename, field\n\n\ndef _get_ys(properties, file2datapoints: dict[str, list[dict]]):\n    y = properties.get(\"y\", None)\n    if y is not None:\n        for filename, field in _file_field(y):\n            _verify_field(file2datapoints, filename, field)\n            yield filename, field\n\n\ndef _is_datapoints(lst: list[dict]):\n    \"\"\"\n    check if dict keys match, datapoints with different keys mgiht lead\n    to unexpected behavior\n    \"\"\"\n\n    return all(isinstance(item, dict) for item in lst) and set(first(lst).keys()) == {\n        key for keys in lst for key in keys\n    }\n\n\ndef get_datapoints(file_content: dict):\n    result: list[dict[str, Any]] = []\n    for lst in _lists(file_content):\n        if _is_datapoints(lst):\n            for index, datapoint in enumerate(lst):\n                if len(result) <= index:\n                    result.append({})\n                result[index].update(datapoint)\n    return result\n\n\nclass VegaConverter(Converter):\n    \"\"\"\n    Class that takes care of converting unspecified data blob\n    (Dict or List[Dict]) into datapoints (List[Dict]).\n    If some properties that are required by Template class are missing\n    ('x', 'y') it will attempt to fill in the blanks.\n    \"\"\"\n\n    def __init__(\n        self,\n        plot_id: str,\n        data: Optional[dict] = None,\n        properties: Optional[dict] = None,\n    ):\n        super().__init__(plot_id, data, properties)\n        self.plot_id = plot_id\n\n    def _infer_y_from_data(self):\n        if self.plot_id in self.data:\n            for lst in _lists(self.data[self.plot_id]):\n                if all(isinstance(item, dict) for item in lst):\n                    datapoint = first(lst)\n                    field = last(datapoint.keys())\n                    return {self.plot_id: field}\n        return None\n\n    def _infer_x_y(self):\n        x = self.properties.get(\"x\", None)\n        y = self.properties.get(\"y\", None)\n\n        inferred_properties: dict = {}\n\n        # Infer x.\n        if isinstance(x, str):\n            inferred_properties[\"x\"] = {}\n            # If multiple y files, duplicate x for each file.\n            if isinstance(y, dict):\n                for file, fields in y.items():\n                    # Duplicate x for each y.\n                    if isinstance(fields, list):\n                        inferred_properties[\"x\"][file] = [x] * len(fields)\n                    else:\n                        inferred_properties[\"x\"][file] = x\n            # Otherwise use plot ID as file.\n            else:\n                inferred_properties[\"x\"][self.plot_id] = x\n\n        # Infer y.\n        if y is None:\n            inferred_properties[\"y\"] = self._infer_y_from_data()\n        # If y files not provided, use plot ID as file.\n        elif not isinstance(y, dict):\n            inferred_properties[\"y\"] = {self.plot_id: y}\n\n        return inferred_properties\n\n    def _find_datapoints(self):\n        result = {}\n        for file, content in self.data.items():\n            result[file] = get_datapoints(content)\n\n        return result\n\n    @staticmethod\n    def infer_y_label(properties):\n        y_label = properties.get(\"y_label\", None)\n        if y_label is not None:\n            return y_label\n        y = properties.get(\"y\", None)\n        if isinstance(y, str):\n            return y\n        if isinstance(y, list):\n            return \"y\"\n        if not isinstance(y, dict):\n            return None\n\n        fields = {field for _, field in _file_field(y)}\n        if len(fields) == 1:\n            return first(fields)\n        return \"y\"\n\n    @staticmethod\n    def infer_x_label(properties):\n        x_label = properties.get(\"x_label\", None)\n        if x_label is not None:\n            return x_label\n\n        x = properties.get(\"x\", None)\n        if not isinstance(x, dict):\n            return INDEX\n\n        fields = {field for _, field in _file_field(x)}\n        if len(fields) == 1:\n            return first(fields)\n        return \"x\"\n\n    def flat_datapoints(self, revision):  # noqa: C901, PLR0912\n        file2datapoints, properties = self.convert()\n\n        props_update: dict[str, Union[str, list[dict[str, str]]]] = {}\n\n        xs = list(_get_xs(properties, file2datapoints))\n\n        # assign \"step\" if no x provided\n        if not xs:\n            x_file, x_field = None, INDEX\n        else:\n            x_file, x_field = xs[0]\n\n        num_xs = len(xs)\n        multiple_x_fields = num_xs > 1 and len({x[1] for x in xs}) > 1\n        props_update[\"x\"] = \"dvc_inferred_x_value\" if multiple_x_fields else x_field\n\n        ys = list(_get_ys(properties, file2datapoints))\n\n        num_ys = len(ys)\n        if num_xs > 1 and num_xs != num_ys:\n            raise DvcException(\n                \"Cannot have different number of x and y data sources. Found \"\n                f\"{num_xs} x and {num_ys} y data sources.\"\n            )\n\n        all_datapoints = []\n        if ys:\n            _all_y_files, _all_y_fields = list(zip(*ys))\n            all_y_fields = set(_all_y_fields)\n            all_y_files = set(_all_y_files)\n        else:\n            all_y_files = set()\n            all_y_fields = set()\n\n        # override to unified y field name if there are different y fields\n        if len(all_y_fields) > 1:\n            props_update[\"y\"] = \"dvc_inferred_y_value\"\n        else:\n            props_update[\"y\"] = first(all_y_fields)\n\n        # get common prefix to drop from file names\n        if len(all_y_files) > 1:\n            common_prefix_len = len(os.path.commonpath(list(all_y_files)))\n        else:\n            common_prefix_len = 0\n\n        props_update[\"anchors_y_definitions\"] = [\n            {FILENAME: _get_short_y_file(y_file, common_prefix_len), FIELD: y_field}\n            for y_file, y_field in ys\n        ]\n\n        for i, (y_file, y_field) in enumerate(ys):\n            if num_xs > 1:\n                x_file, x_field = xs[i]\n            datapoints = [{**d} for d in file2datapoints.get(y_file, [])]\n\n            if props_update.get(\"y\") == \"dvc_inferred_y_value\":\n                _update_from_field(\n                    datapoints,\n                    field=\"dvc_inferred_y_value\",\n                    source_field=y_field,\n                )\n\n            if x_field == INDEX and x_file is None:\n                _update_from_index(datapoints, INDEX)\n            else:\n                x_datapoints = file2datapoints.get(x_file, [])\n                try:\n                    _update_from_field(\n                        datapoints,\n                        field=\"dvc_inferred_x_value\" if multiple_x_fields else x_field,\n                        source_datapoints=x_datapoints,\n                        source_field=x_field,\n                    )\n                except IndexError:\n                    raise DvcException(  # noqa: B904\n                        f\"Cannot join '{x_field}' from '{x_file}' and \"\n                        f\"'{y_field}' from '{y_file}'. \"\n                        \"They have to have same length.\"\n                    )\n\n            _update_all(\n                datapoints,\n                update_dict={\n                    REVISION: revision,\n                    FILENAME: _get_short_y_file(y_file, common_prefix_len),\n                    FIELD: y_field,\n                },\n            )\n\n            all_datapoints.extend(datapoints)\n\n        if not all_datapoints:\n            return [], {}\n\n        properties = properties | props_update\n\n        return all_datapoints, properties\n\n    def convert(self):\n        \"\"\"\n        Convert the data. Fill necessary fields ('x', 'y') and return both\n        generated datapoints and updated properties. `x`, `y` values and labels\n        are inferred and always provided.\n        \"\"\"\n        inferred_properties = self._infer_x_y()\n\n        datapoints = self._find_datapoints()\n        properties = self.properties | inferred_properties\n\n        properties[\"y_label\"] = self.infer_y_label(properties)\n        properties[\"x_label\"] = self.infer_x_label(properties)\n\n        return datapoints, properties\n\n\ndef _get_short_y_file(y_file, common_prefix_len):\n    return y_file[common_prefix_len:].strip(\"/\\\\\")\n\n\ndef _update_from_field(\n    target_datapoints: list[dict],\n    field: str,\n    source_datapoints: Optional[list[dict]] = None,\n    source_field: Optional[str] = None,\n):\n    if source_datapoints is None:\n        source_datapoints = target_datapoints\n    if source_field is None:\n        source_field = field\n\n    if len(source_datapoints) != len(target_datapoints):\n        raise IndexError(\"Source and target datapoints must have the same length\")\n\n    for index, datapoint in enumerate(target_datapoints):\n        source_datapoint = source_datapoints[index]\n        if source_field in source_datapoint:\n            datapoint[field] = source_datapoint[source_field]\n\n\ndef _update_from_index(datapoints: list[dict], new_field: str):\n    for index, datapoint in enumerate(datapoints):\n        datapoint[new_field] = index\n\n\ndef _update_all(datapoints: list[dict], update_dict: dict):\n    for datapoint in datapoints:\n        datapoint.update(update_dict)\n"
  },
  {
    "path": "dvc/render/match.py",
    "content": "import os\nfrom collections import defaultdict\nfrom typing import TYPE_CHECKING, NamedTuple, Optional\n\nimport dpath.options\nfrom funcy import get_in, last\n\nfrom dvc.log import logger\nfrom dvc.repo.plots import _normpath, infer_data_sources\nfrom dvc.utils.plots import group_definitions_by_id\n\nfrom .convert import _get_converter\n\nif TYPE_CHECKING:\n    from dvc.types import StrPath\n    from dvc_render.base import Renderer\n\n\ndpath.options.ALLOW_EMPTY_STRING_KEYS = True\nlogger = logger.getChild(__name__)\n\n\ndef _squash_plots_properties(data: list) -> dict:\n    configs = [last(group) for group in data]\n    resolved: dict = {}\n    for config in reversed(configs):\n        resolved = resolved | config\n    return resolved\n\n\nclass PlotsData:\n    def __init__(self, data: dict):\n        self.data = data\n\n    def group_definitions(self):\n        groups = defaultdict(list)\n        for rev, rev_content in self.data.items():\n            definitions = rev_content.get(\"definitions\", {}).get(\"data\", {})\n            for plot_id, definition in group_definitions_by_id(definitions).items():\n                groups[plot_id].append((rev, *definition))\n        return dict(groups)\n\n    def get_definition_data(self, target_files, rev):\n        result = {}\n        for definition_file in target_files:\n            if os.name == \"nt\":\n                source_file = _normpath(definition_file).replace(\"\\\\\", \"/\")\n            else:\n                source_file = definition_file\n            file_content = (\n                self.data.get(rev, {})\n                .get(\"sources\", {})\n                .get(\"data\", {})\n                .get(source_file, {})\n                .get(\"data\", {})\n            )\n            if file_content:\n                result[definition_file] = file_content\n        return result\n\n\nclass RendererWithErrors(NamedTuple):\n    renderer: \"Renderer\"\n    source_errors: dict[str, dict[str, Exception]]\n    definition_errors: dict[str, Exception]\n\n\ndef match_defs_renderers(  # noqa: C901, PLR0912\n    data,\n    out=None,\n    templates_dir: Optional[\"StrPath\"] = None,\n) -> list[RendererWithErrors]:\n    from dvc_render import ImageRenderer, VegaRenderer\n\n    plots_data = PlotsData(data)\n    renderers = []\n    renderer_cls = None\n\n    for plot_id, group in plots_data.group_definitions().items():\n        plot_datapoints: list[dict] = []\n        props = _squash_plots_properties(group)\n        first_props: dict = {}\n\n        def_errors: dict[str, Exception] = {}\n        src_errors: defaultdict[str, dict[str, Exception]] = defaultdict(dict)\n\n        if out is not None:\n            props[\"out\"] = out\n        if templates_dir is not None:\n            props[\"template_dir\"] = templates_dir\n\n        revs = []\n        for rev, inner_id, plot_definition in group:\n            plot_sources = infer_data_sources(inner_id, plot_definition)\n            definitions_data = plots_data.get_definition_data(plot_sources, rev)\n\n            if ImageRenderer.matches(inner_id, None):\n                renderer_cls = ImageRenderer\n                renderer_id = inner_id\n            else:\n                renderer_cls = VegaRenderer\n                renderer_id = plot_id\n\n            converter = _get_converter(renderer_cls, inner_id, props, definitions_data)\n\n            for src in plot_sources:\n                if error := get_in(data, [rev, \"sources\", \"data\", src, \"error\"]):\n                    src_errors[rev][src] = error\n\n            try:\n                dps, rev_props = converter.flat_datapoints(rev)\n                if dps and rev not in revs:\n                    revs.append(rev)\n            except Exception as e:  # noqa: BLE001\n                logger.warning(\"In %r, %s\", rev, str(e).lower())\n                def_errors[rev] = e\n                continue\n\n            if not first_props and rev_props:\n                first_props = rev_props\n            plot_datapoints.extend(dps)\n\n        if \"title\" not in first_props:\n            first_props[\"title\"] = renderer_id\n\n        if revs:\n            first_props[\"revs_with_datapoints\"] = revs\n\n        if renderer_cls is not None:\n            renderer = renderer_cls(plot_datapoints, renderer_id, **first_props)\n            renderers.append(RendererWithErrors(renderer, dict(src_errors), def_errors))\n    return renderers\n"
  },
  {
    "path": "dvc/repo/__init__.py",
    "content": "import os\nfrom collections import defaultdict\nfrom collections.abc import Iterable\nfrom contextlib import AbstractContextManager, contextmanager\nfrom functools import wraps\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nfrom dvc.exceptions import (\n    DvcException,\n    NotDvcRepoError,\n    OutputNotFoundError,\n    RevCollectionError,\n)\nfrom dvc.ignore import DvcIgnoreFilter\nfrom dvc.log import logger\nfrom dvc.utils.objects import cached_property\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.fs.data import DataFileSystem\n    from dvc.fs.dvc import DVCFileSystem\n    from dvc.lock import LockBase\n    from dvc.output import Output\n    from dvc.scm import Git, NoSCM\n    from dvc.stage import Stage\n    from dvc.types import DictStrAny\n    from dvc_data.hashfile.state import StateBase\n    from dvc_data.index import DataIndex, DataIndexEntry\n\n    from .experiments import Experiments\n    from .index import Index\n    from .scm_context import SCMContext\n\nlogger = logger.getChild(__name__)\n\n\n@contextmanager\ndef lock_repo(repo: \"Repo\"):\n    depth: int = repo._lock_depth\n    repo._lock_depth += 1\n\n    try:\n        if depth > 0:\n            yield\n        else:\n            with repo.lock:\n                repo._reset()\n                yield\n                # Graph cache is no longer valid after we release the repo.lock\n                repo._reset()\n    finally:\n        repo._lock_depth = depth\n\n\ndef locked(f):\n    @wraps(f)\n    def wrapper(repo, *args, **kwargs):\n        with lock_repo(repo):\n            return f(repo, *args, **kwargs)\n\n    return wrapper\n\n\nclass Repo:\n    DVC_DIR = \".dvc\"\n\n    from dvc.repo.add import add  # type: ignore[misc]\n    from dvc.repo.checkout import checkout  # type: ignore[misc]\n    from dvc.repo.commit import commit  # type: ignore[misc]\n    from dvc.repo.destroy import destroy  # type: ignore[misc]\n    from dvc.repo.diff import diff  # type: ignore[misc]\n    from dvc.repo.du import du as _du  # type: ignore[misc]\n    from dvc.repo.fetch import fetch  # type: ignore[misc]\n    from dvc.repo.freeze import freeze, unfreeze  # type: ignore[misc]\n    from dvc.repo.gc import gc  # type: ignore[misc]\n    from dvc.repo.get import get as _get  # type: ignore[misc]\n    from dvc.repo.get_url import get_url as _get_url  # type: ignore[misc]\n    from dvc.repo.imp import imp  # type: ignore[misc]\n    from dvc.repo.imp_db import imp_db  # type: ignore[misc]\n    from dvc.repo.imp_url import imp_url  # type: ignore[misc]\n    from dvc.repo.install import install  # type: ignore[misc]\n    from dvc.repo.ls import ls as _ls  # type: ignore[misc]\n    from dvc.repo.ls_url import ls_url as _ls_url  # type: ignore[misc]\n    from dvc.repo.move import move  # type: ignore[misc]\n    from dvc.repo.pull import pull  # type: ignore[misc]\n    from dvc.repo.push import push  # type: ignore[misc]\n    from dvc.repo.remove import remove  # type: ignore[misc]\n    from dvc.repo.reproduce import reproduce  # type: ignore[misc]\n    from dvc.repo.run import run  # type: ignore[misc]\n    from dvc.repo.status import status  # type: ignore[misc]\n    from dvc.repo.update import update  # type: ignore[misc]\n\n    from .cache import check_missing as cache_check_missing  # type: ignore[misc]\n    from .data import status as data_status  # type: ignore[misc]\n\n    du = staticmethod(_du)\n    ls = staticmethod(_ls)\n    ls_url = staticmethod(_ls_url)\n    get = staticmethod(_get)\n    get_url = staticmethod(_get_url)\n\n    def _get_repo_dirs(\n        self,\n        root_dir: Optional[str] = None,\n        fs: Optional[\"FileSystem\"] = None,\n        uninitialized: bool = False,\n        scm: Optional[Union[\"Git\", \"NoSCM\"]] = None,\n    ) -> tuple[str, Optional[str]]:\n        from dvc.fs import localfs\n        from dvc.scm import SCM, SCMError\n\n        dvc_dir: Optional[str] = None\n        try:\n            root_dir = self.find_root(root_dir, fs)\n            fs = fs or localfs\n            dvc_dir = fs.join(root_dir, self.DVC_DIR)\n        except NotDvcRepoError:\n            if not uninitialized:\n                raise\n\n            if not scm:\n                try:\n                    scm = SCM(root_dir or os.curdir)\n                    if scm.dulwich.repo.bare:\n                        raise NotDvcRepoError(f\"{scm.root_dir} is a bare git repo\")\n                except SCMError:\n                    scm = SCM(os.curdir, no_scm=True)\n\n            if not fs or not root_dir:\n                root_dir = scm.root_dir\n\n        assert root_dir\n        return root_dir, dvc_dir\n\n    def __init__(  # noqa: PLR0915, PLR0913\n        self,\n        root_dir: Optional[str] = None,\n        fs: Optional[\"FileSystem\"] = None,\n        rev: Optional[str] = None,\n        subrepos: bool = False,\n        uninitialized: bool = False,\n        config: Optional[\"DictStrAny\"] = None,\n        url: Optional[str] = None,\n        repo_factory: Optional[Callable] = None,\n        scm: Optional[Union[\"Git\", \"NoSCM\"]] = None,\n        remote: Optional[str] = None,\n        remote_config: Optional[\"DictStrAny\"] = None,\n        _wait_for_lock: bool = False,\n    ):\n        from dvc.cachemgr import CacheManager\n        from dvc.data_cloud import DataCloud\n        from dvc.fs import GitFileSystem, LocalFileSystem\n        from dvc.lock import LockNoop, make_lock\n        from dvc.repo.artifacts import Artifacts\n        from dvc.repo.datasets import Datasets\n        from dvc.repo.metrics import Metrics\n        from dvc.repo.params import Params\n        from dvc.repo.plots import Plots\n        from dvc.repo.stage import StageLoad\n        from dvc.scm import SCM\n        from dvc.stage.cache import StageCache\n        from dvc_data.hashfile.state import State, StateNoop\n\n        self.url = url\n        self._fs_conf = {\"repo_factory\": repo_factory}\n        self._fs = fs or LocalFileSystem()\n        self._scm = scm\n        self._config = config\n        self._remote = remote\n        self._remote_config = remote_config\n        self._data_index: Optional[DataIndex] = None\n        self._wait_for_lock = _wait_for_lock\n\n        if rev and not fs:\n            self._scm = scm = SCM(root_dir or os.curdir)\n            root_dir = \"/\"\n            self._fs = GitFileSystem(scm=self._scm, rev=rev)\n\n        self.root_dir: str\n        self.dvc_dir: Optional[str]\n        (self.root_dir, self.dvc_dir) = self._get_repo_dirs(\n            root_dir=root_dir, fs=self.fs, uninitialized=uninitialized, scm=scm\n        )\n\n        self._uninitialized = uninitialized\n\n        # used by DVCFileSystem to determine if it should traverse subrepos\n        self.subrepos = subrepos\n\n        self.cloud: DataCloud = DataCloud(self)\n        self.stage: StageLoad = StageLoad(self)\n\n        self.lock: LockBase\n        self.cache: CacheManager\n        self.state: StateBase\n        if isinstance(self.fs, GitFileSystem) or not self.dvc_dir:\n            self.lock = LockNoop()\n            self.state = StateNoop()\n            self.cache = CacheManager(self)\n        else:\n            if isinstance(self.fs, LocalFileSystem):\n                assert self.tmp_dir\n                self.fs.makedirs(self.tmp_dir, exist_ok=True)\n\n                self.lock = make_lock(\n                    self.fs.join(self.tmp_dir, \"lock\"),\n                    tmp_dir=self.tmp_dir,\n                    hardlink_lock=self.config[\"core\"].get(\"hardlink_lock\", False),\n                    friendly=True,\n                    wait=self._wait_for_lock,\n                )\n                os.makedirs(self.site_cache_dir, exist_ok=True)\n                if not fs and (\n                    checksum_jobs := self.config[\"core\"].get(\"checksum_jobs\")\n                ):\n                    self.fs.hash_jobs = checksum_jobs\n\n                self.state = State(self.root_dir, self.site_cache_dir, self.dvcignore)\n            else:\n                self.lock = LockNoop()\n                self.state = StateNoop()\n\n            self.cache = CacheManager(self)\n\n            self.stage_cache = StageCache(self)\n\n            self._ignore()\n\n        self.metrics: Metrics = Metrics(self)\n        self.plots: Plots = Plots(self)\n        self.params: Params = Params(self)\n        self.artifacts: Artifacts = Artifacts(self)\n        self.datasets: Datasets = Datasets(self)\n\n        self.stage_collection_error_handler: Optional[\n            Callable[[str, Exception], None]\n        ] = None\n        self._lock_depth: int = 0\n\n    def __str__(self):\n        return self.url or self.root_dir\n\n    @cached_property\n    def config(self):\n        from dvc.config import Config\n\n        return Config(\n            self.dvc_dir,\n            local_dvc_dir=self.local_dvc_dir,\n            fs=self.fs,\n            config=self._config,\n            remote=self._remote,\n            remote_config=self._remote_config,\n        )\n\n    @cached_property\n    def local_dvc_dir(self) -> Optional[str]:\n        from dvc.fs import GitFileSystem, LocalFileSystem\n\n        if not self.dvc_dir:\n            return None\n\n        if isinstance(self.fs, LocalFileSystem):\n            return self.dvc_dir\n\n        if not isinstance(self.fs, GitFileSystem):\n            return None\n\n        relparts: tuple[str, ...] = ()\n        if self.root_dir != \"/\":\n            # subrepo\n            relparts = self.fs.relparts(self.root_dir, \"/\")\n\n        dvc_dir = os.path.join(self.scm.root_dir, *relparts, self.DVC_DIR)\n        if os.path.exists(dvc_dir):\n            return dvc_dir\n\n        return None\n\n    @cached_property\n    def tmp_dir(self):\n        if self.local_dvc_dir is None:\n            return None\n\n        return os.path.join(self.local_dvc_dir, \"tmp\")\n\n    @cached_property\n    def index(self) -> \"Index\":\n        from dvc.repo.index import Index\n\n        return Index.from_repo(self)\n\n    def check_graph(\n        self, stages: Iterable[\"Stage\"], callback: Optional[Callable] = None\n    ) -> None:\n        if not getattr(self, \"_skip_graph_checks\", False):\n            new = self.index.update(stages)\n            if callable(callback):\n                callback()\n            new.check_graph()\n\n    @staticmethod\n    def open(url: Optional[str], *args, **kwargs) -> \"Repo\":\n        from .open_repo import open_repo\n\n        return open_repo(url, *args, **kwargs)\n\n    @cached_property\n    def scm(self) -> Union[\"Git\", \"NoSCM\"]:\n        from dvc.scm import SCM, SCMError\n\n        if self._scm:\n            return self._scm\n\n        no_scm = self.config[\"core\"].get(\"no_scm\", False)\n        try:\n            return SCM(self.root_dir, no_scm=no_scm)\n        except SCMError:\n            if self._uninitialized:\n                # might not be a git/dvc repo at all\n                # used in `params/metrics/plots` targets\n                return SCM(self.root_dir, no_scm=True)\n            raise\n\n    @cached_property\n    def scm_context(self) -> \"SCMContext\":\n        from dvc.repo.scm_context import SCMContext\n\n        return SCMContext(self.scm, self.config)\n\n    @cached_property\n    def dvcignore(self) -> DvcIgnoreFilter:\n        return DvcIgnoreFilter(self.fs, self.root_dir)\n\n    def get_rev(self):\n        from dvc.fs import GitFileSystem, LocalFileSystem\n\n        assert self.scm\n        if isinstance(self.fs, LocalFileSystem):\n            from dvc.scm import map_scm_exception\n\n            with map_scm_exception():\n                return self.scm.get_rev()\n        assert isinstance(self.fs, GitFileSystem)\n        return self.fs.rev\n\n    @cached_property\n    def experiments(self) -> \"Experiments\":\n        from dvc.repo.experiments import Experiments\n\n        return Experiments(self)\n\n    @property\n    def fs(self) -> \"FileSystem\":\n        return self._fs\n\n    @fs.setter\n    def fs(self, fs: \"FileSystem\"):\n        self._fs = fs\n        # Our graph cache is no longer valid, as it was based on the previous\n        # fs.\n        self._reset()\n\n    @property\n    def data_index(self) -> \"DataIndex\":\n        from dvc_data.index import DataIndex\n\n        if self._data_index is None:\n            index_dir = os.path.join(self.site_cache_dir, \"index\", \"data\")\n            os.makedirs(index_dir, exist_ok=True)\n            self._data_index = DataIndex.open(os.path.join(index_dir, \"db.db\"))\n\n        return self._data_index\n\n    def drop_data_index(self) -> None:\n        for key in self.data_index.ls((), detail=False):\n            try:\n                self.data_index.delete_node(key)\n            except KeyError:\n                pass\n        self.data_index.commit()\n        self.data_index.close()\n        self._reset()\n\n    def get_data_index_entry(\n        self,\n        path: str,\n        workspace: str = \"repo\",\n    ) -> tuple[\"DataIndex\", \"DataIndexEntry\"]:\n        if self.subrepos:\n            fs_path = self.dvcfs.from_os_path(path)\n            fs = self.dvcfs.fs\n            key = fs._get_key_from_relative(fs_path)\n            subrepo, _, key = fs._get_subrepo_info(key)\n            index = subrepo.index.data[workspace]\n        else:\n            index = self.index.data[workspace]\n            key = self.fs.relparts(path, self.root_dir)\n\n        try:\n            return index, index[key]\n        except KeyError as exc:\n            raise OutputNotFoundError(path, self) from exc\n\n    def __repr__(self):\n        return f\"{self.__class__.__name__}: '{self.root_dir}'\"\n\n    @classmethod\n    def find_root(cls, root=None, fs=None) -> str:\n        from dvc.fs import LocalFileSystem, localfs\n\n        fs = fs or localfs\n        root = root or os.curdir\n        root_dir = fs.abspath(root)\n\n        if not fs.isdir(root_dir):\n            raise NotDvcRepoError(f\"directory '{root}' does not exist\")\n\n        while True:\n            dvc_dir = fs.join(root_dir, cls.DVC_DIR)\n            if fs.isdir(dvc_dir):\n                return root_dir\n            if isinstance(fs, LocalFileSystem) and os.path.ismount(root_dir):\n                break\n            parent = fs.parent(root_dir)\n            if parent == root_dir:\n                break\n            root_dir = parent\n\n        msg = \"you are not inside of a DVC repository\"\n\n        if isinstance(fs, LocalFileSystem):\n            msg = f\"{msg} (checked up to mount point '{root_dir}')\"\n\n        raise NotDvcRepoError(msg)\n\n    @classmethod\n    def find_dvc_dir(cls, root=None, fs=None) -> str:\n        from dvc.fs import localfs\n\n        fs = fs or localfs\n        root_dir = cls.find_root(root, fs=fs)\n        return fs.join(root_dir, cls.DVC_DIR)\n\n    @staticmethod\n    def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False) -> \"Repo\":\n        from dvc.repo.init import init\n\n        return init(root_dir=root_dir, no_scm=no_scm, force=force, subdir=subdir)\n\n    def unprotect(self, target):\n        from dvc.fs.callbacks import TqdmCallback\n\n        with TqdmCallback(desc=f\"Unprotecting {target}\") as callback:\n            return self.cache.repo.unprotect(target, callback=callback)\n\n    def _ignore(self):\n        flist = [self.config.files[\"local\"]]\n        if tmp_dir := self.tmp_dir:\n            flist.append(tmp_dir)\n\n        if cache_dir := self.cache.default_local_cache_dir:\n            flist.append(cache_dir)\n\n        for file in flist:\n            self.scm_context.ignore(file)\n\n    def brancher(self, *args, **kwargs):\n        from dvc.repo.brancher import brancher\n\n        return brancher(self, *args, **kwargs)\n\n    def switch(self, rev: str) -> AbstractContextManager[str]:\n        from dvc.repo.brancher import switch\n\n        return switch(self, rev)\n\n    def used_objs(  # noqa: PLR0913\n        self,\n        targets=None,\n        all_branches=False,\n        with_deps=False,\n        all_tags=False,\n        all_commits=False,\n        all_experiments=False,\n        commit_date: Optional[str] = None,\n        remote=None,\n        force=False,\n        jobs=None,\n        recursive=False,\n        used_run_cache=None,\n        revs=None,\n        num=1,\n        push: bool = False,\n        skip_failed: bool = False,\n    ):\n        \"\"\"Get the stages related to the given target and collect\n        the `info` of its outputs.\n\n        This is useful to know what files from the cache are _in use_\n        (namely, a file described as an output on a stage).\n\n        The scope is, by default, the working directory, but you can use\n        `all_branches`/`all_tags`/`all_commits`/`all_experiments` to expand\n        the scope.\n\n        Returns:\n            A dict mapping (remote) ODB instances to sets of objects that\n            belong to each ODB. If the ODB instance is None, the objects\n            are naive and do not belong to a specific remote ODB.\n        \"\"\"\n        used = defaultdict(set)\n\n        for rev in self.brancher(\n            revs=revs,\n            all_branches=all_branches,\n            all_tags=all_tags,\n            all_commits=all_commits,\n            all_experiments=all_experiments,\n            commit_date=commit_date,\n            num=num,\n        ):\n            try:\n                for odb, objs in self.index.used_objs(\n                    targets,\n                    remote=remote,\n                    force=force,\n                    jobs=jobs,\n                    recursive=recursive,\n                    with_deps=with_deps,\n                    push=push,\n                ).items():\n                    used[odb].update(objs)\n            except DvcException as exc:\n                rev = rev or \"workspace\"\n                if skip_failed:\n                    logger.warning(\"Failed to collect '%s', skipping\", rev)\n                else:\n                    raise RevCollectionError(rev) from exc\n        if used_run_cache:\n            for odb, objs in self.stage_cache.get_used_objs(\n                used_run_cache, remote=remote, force=force, jobs=jobs\n            ).items():\n                used[odb].update(objs)\n\n        return used\n\n    def find_outs_by_path(\n        self, path, outs=None, recursive=False, strict=True\n    ) -> list[\"Output\"]:\n        # using `outs_graph` to ensure graph checks are run\n        outs = outs or self.index.outs_graph\n\n        abs_path = self.fs.abspath(path)\n        fs_path = abs_path\n\n        def func(out):\n            def eq(one, two):\n                return one == two\n\n            match = eq if strict else out.fs.isin_or_eq\n\n            if out.protocol == \"local\" and match(fs_path, out.fs_path):\n                return True\n            return recursive and out.fs.isin(out.fs_path, fs_path)\n\n        matched = list(filter(func, outs))\n        if not matched:\n            raise OutputNotFoundError(path, self)\n\n        return matched\n\n    def is_dvc_internal(self, path):\n        path_parts = self.fs.normpath(path).split(self.fs.sep)\n        return self.DVC_DIR in path_parts\n\n    @cached_property\n    def datafs(self) -> \"DataFileSystem\":\n        from dvc.fs.data import DataFileSystem\n\n        return DataFileSystem(index=self.index.data[\"repo\"])\n\n    @cached_property\n    def dvcfs(self) -> \"DVCFileSystem\":\n        from dvc.fs.dvc import DVCFileSystem\n\n        return DVCFileSystem(repo=self, subrepos=self.subrepos, **self._fs_conf)\n\n    @cached_property\n    def _btime(self):\n        if not self.tmp_dir:\n            return None\n\n        # Not all python versions/filesystems/platforms provide creation\n        # time (st_birthtime, stx_btime, etc), so we use our own dummy\n        # file and its mtime instead.\n        path = os.path.join(self.tmp_dir, \"btime\")\n\n        try:\n            with open(path, \"x\"):\n                pass\n        except FileNotFoundError:\n            return None\n        except FileExistsError:\n            pass\n\n        return os.path.getmtime(path)\n\n    @cached_property\n    def site_cache_dir(self) -> str:\n        import getpass\n        import hashlib\n\n        from dvc.dirs import site_cache_dir\n        from dvc.fs import GitFileSystem\n        from dvc.version import version_tuple\n\n        cache_dir = site_cache_dir(self.config[\"core\"].get(\"site_cache_dir\"))\n\n        subdir = None\n        if isinstance(self.fs, GitFileSystem):\n            if self.root_dir != \"/\":\n                # subrepo\n                subdir = self.root_dir\n            root_dir = self.scm.root_dir\n        else:\n            root_dir = self.root_dir\n\n        repos_dir = os.path.join(cache_dir, \"repo\")\n\n        umask = os.umask(0)\n        try:\n            os.makedirs(repos_dir, mode=0o777, exist_ok=True)\n        finally:\n            os.umask(umask)\n\n        # NOTE: Some number to change the generated token if none of the\n        # components were changed (useful to prevent newer dvc versions from\n        # using older broken cache). Please reset this back to 0 if other parts\n        # of the token components are changed.\n        salt = 0\n\n        # NOTE: This helps us avoid accidentally reusing cache for repositories\n        # that just happened to be at the same path as old deleted ones.\n        btime = self._btime or getattr(os.stat(root_dir), \"st_birthtime\", None)\n\n        md5 = hashlib.md5(\n            str(\n                (root_dir, subdir, btime, getpass.getuser(), version_tuple[0], salt)\n            ).encode(),\n            usedforsecurity=False,\n        )\n        repo_token = md5.hexdigest()\n        return os.path.join(repos_dir, repo_token)\n\n    def close(self):\n        self.scm.close()\n        self.state.close()\n        if \"dvcfs\" in self.__dict__:\n            self.dvcfs.close()\n        if self._data_index is not None:\n            self._data_index.close()\n\n    def _reset(self):\n        self.scm._reset()\n        self.datasets._reset()\n        self.state.close()\n        if \"dvcfs\" in self.__dict__:\n            self.dvcfs.close()\n        self.__dict__.pop(\"index\", None)\n        self.__dict__.pop(\"dvcignore\", None)\n        self.__dict__.pop(\"dvcfs\", None)\n        self.__dict__.pop(\"datafs\", None)\n        self.__dict__.pop(\"config\", None)\n\n    def __enter__(self):\n        return self\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        self.close()\n"
  },
  {
    "path": "dvc/repo/add.py",
    "content": "import os\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import TYPE_CHECKING, NamedTuple, Optional, Union\n\nfrom dvc.exceptions import (\n    CacheLinkError,\n    DvcException,\n    OutputDuplicationError,\n    OutputNotFoundError,\n    OverlappingOutputPathsError,\n)\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.ui import ui\nfrom dvc.utils import glob_targets, resolve_output, resolve_paths\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.stage import Stage\n    from dvc.types import StrOrBytesPath\n\n\nclass StageInfo(NamedTuple):\n    stage: \"Stage\"\n    output_exists: bool\n\n\ndef find_targets(\n    targets: Union[\"StrOrBytesPath\", Iterator[\"StrOrBytesPath\"]], glob: bool = False\n) -> list[str]:\n    if isinstance(targets, (str, bytes, os.PathLike)):\n        targets_list = [os.fsdecode(targets)]\n    else:\n        targets_list = [os.fsdecode(target) for target in targets]\n    return glob_targets(targets_list, glob=glob)\n\n\nPIPELINE_TRACKED_UPDATE_FMT = (\n    \"cannot update {out!r}: overlaps with an output of {stage} in '{path}'.\\n\"\n    \"Run the pipeline or use 'dvc commit' to force update it.\"\n)\n\n\ndef get_or_create_stage(\n    repo: \"Repo\",\n    target: str,\n    out: Optional[str] = None,\n    to_remote: bool = False,\n    force: bool = False,\n) -> StageInfo:\n    if out:\n        target = resolve_output(target, out, force=force)\n    path, wdir, out = resolve_paths(repo, target, always_local=to_remote and not out)\n\n    try:\n        (out_obj,) = repo.find_outs_by_path(target, strict=False)\n        stage = out_obj.stage\n        if not stage.is_data_source:\n            msg = PIPELINE_TRACKED_UPDATE_FMT.format(\n                out=out, stage=stage, path=stage.relpath\n            )\n            raise DvcException(msg)\n        return StageInfo(stage, output_exists=True)\n    except OutputNotFoundError:\n        stage = repo.stage.create(\n            single_stage=True,\n            validate=False,\n            fname=path,\n            wdir=wdir,\n            outs=[out],\n            force=force,\n        )\n        return StageInfo(stage, output_exists=False)\n\n\nOVERLAPPING_CHILD_FMT = (\n    \"Cannot add '{out}', because it is overlapping with other \"\n    \"DVC tracked output: '{parent}'.\\n\"\n    \"To include '{out}' in '{parent}', run \"\n    \"'dvc commit {parent_stage}'\"\n)\n\nOVERLAPPING_PARENT_FMT = (\n    \"Cannot add '{parent}', because it is overlapping with other \"\n    \"DVC tracked output: '{out}'.\\n\"\n    \"To include '{out}' in '{parent}', run \"\n    \"'dvc remove {out_stage}' and then 'dvc add {parent}'\"\n)\n\n\n@contextmanager\ndef translate_graph_error(stages: list[\"Stage\"]) -> Iterator[None]:\n    try:\n        yield\n    except OverlappingOutputPathsError as exc:\n        if exc.parent in [o for s in stages for o in s.outs]:\n            msg = OVERLAPPING_PARENT_FMT.format(\n                out=exc.overlapping_out,\n                parent=exc.parent,\n                out_stage=exc.overlapping_out.stage.addressing,\n            )\n        else:\n            msg = OVERLAPPING_CHILD_FMT.format(\n                out=exc.overlapping_out,\n                parent=exc.parent,\n                parent_stage=exc.parent.stage.addressing,\n            )\n        raise OverlappingOutputPathsError(  # noqa: B904\n            exc.parent, exc.overlapping_out, msg\n        )\n    except OutputDuplicationError as exc:\n        raise OutputDuplicationError(  # noqa: B904\n            exc.output, set(exc.stages) - set(stages)\n        )\n\n\ndef progress_iter(stages: dict[str, StageInfo]) -> Iterator[tuple[str, StageInfo]]:\n    total = len(stages)\n    desc = \"Adding...\"\n    with ui.progress(\n        stages.items(), total=total, desc=desc, unit=\"file\", leave=True\n    ) as pbar:\n        if total == 1:\n            pbar.bar_format = desc\n            pbar.refresh()\n\n        for item, stage_info in pbar:\n            if total > 1:\n                pbar.set_msg(str(stage_info.stage.outs[0]))\n                pbar.refresh()\n            yield item, stage_info\n            if total == 1:  # restore bar format for stats\n                pbar.bar_format = pbar.BAR_FMT_DEFAULT\n\n\nLINK_FAILURE_MESSAGE = (\n    \"\\nSome targets could not be linked from cache to workspace.\\n{}\\n\"\n    \"To re-link these targets, reconfigure cache types and then run:\\n\"\n    \"\\n\\tdvc checkout {}\"\n)\n\n\n@contextmanager\ndef warn_link_failures() -> Iterator[list[str]]:\n    link_failures: list[str] = []\n    try:\n        yield link_failures\n    finally:\n        if link_failures:\n            msg = LINK_FAILURE_MESSAGE.format(\n                CacheLinkError.SUPPORT_LINK,\n                \" \".join(link_failures),\n            )\n            ui.error_write(msg)\n\n\ndef _add_transfer(\n    stage: \"Stage\",\n    source: str,\n    remote: Optional[str] = None,\n    to_remote: bool = False,\n    jobs: Optional[int] = None,\n    force: bool = False,\n) -> None:\n    odb = None\n    if to_remote:\n        odb = stage.repo.cloud.get_remote_odb(remote, \"add\")\n    stage.transfer(source, odb=odb, to_remote=to_remote, jobs=jobs, force=force)\n    stage.dump()\n\n\ndef _add(\n    stage: \"Stage\",\n    source: Optional[str] = None,\n    no_commit: bool = False,\n    relink: bool = True,\n) -> None:\n    out = stage.outs[0]\n    path = out.fs.abspath(source) if source else None\n    try:\n        stage.add_outs(path, no_commit=no_commit, relink=relink)\n    except CacheLinkError:\n        stage.dump()\n        raise\n    stage.dump()\n\n\n@locked\n@scm_context\ndef add(\n    repo: \"Repo\",\n    targets: Union[\"StrOrBytesPath\", Iterator[\"StrOrBytesPath\"]],\n    no_commit: bool = False,\n    glob: bool = False,\n    out: Optional[str] = None,\n    remote: Optional[str] = None,\n    to_remote: bool = False,\n    remote_jobs: Optional[int] = None,\n    force: bool = False,\n    relink: bool = True,\n) -> list[\"Stage\"]:\n    add_targets = find_targets(targets, glob=glob)\n    if not add_targets:\n        return []\n\n    stages_with_targets = {\n        target: get_or_create_stage(\n            repo,\n            target,\n            out=out,\n            to_remote=to_remote,\n            force=force,\n        )\n        for target in add_targets\n    }\n\n    stages = [stage for stage, _ in stages_with_targets.values()]\n    msg = \"Collecting stages from the workspace\"\n    with translate_graph_error(stages), ui.status(msg) as st:\n        repo.check_graph(stages=stages, callback=lambda: st.update(\"Checking graph\"))\n\n    if to_remote or out:\n        assert len(stages_with_targets) == 1, \"multiple targets are unsupported\"\n        (source, (stage, _)) = next(iter(stages_with_targets.items()))\n        _add_transfer(stage, source, remote, to_remote, jobs=remote_jobs, force=force)\n        return [stage]\n\n    with warn_link_failures() as link_failures:\n        for source, (stage, output_exists) in progress_iter(stages_with_targets):\n            try:\n                _add(\n                    stage,\n                    source if output_exists else None,\n                    no_commit=no_commit,\n                    relink=relink,\n                )\n            except CacheLinkError:\n                link_failures.append(stage.relpath)\n    return stages\n"
  },
  {
    "path": "dvc/repo/artifacts.py",
    "content": "import os\nimport posixpath\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom dvc.annotations import Artifact\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.exceptions import (\n    ArtifactNotFoundError,\n    DvcException,\n    FileExistsLocallyError,\n    InvalidArgumentError,\n)\nfrom dvc.log import logger\nfrom dvc.utils import as_posix, relpath, resolve_output\nfrom dvc.utils.objects import cached_property\nfrom dvc.utils.serialize import modify_yaml\n\nif TYPE_CHECKING:\n    from gto.tag import Tag as GTOTag\n    from scmrepo.git import GitTag\n\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\nlogger = logger.getChild(__name__)\n\n\ndef check_name_format(name: str) -> None:\n    from gto.constants import assert_name_is_valid\n    from gto.exceptions import ValidationError\n\n    try:\n        assert_name_is_valid(name)\n    except ValidationError as exc:\n        raise InvalidArgumentError(\n            f\"Can't use '{name}' as artifact name (ID).\"\n        ) from exc\n\n\ndef name_is_compatible(name: str) -> bool:\n    \"\"\"\n    Only needed by DVCLive per treeverse/dvclive#715\n    Will be removed in future release.\n    \"\"\"\n    from gto.constants import assert_name_is_valid\n    from gto.exceptions import ValidationError\n\n    try:\n        assert_name_is_valid(name)\n        return True\n    except ValidationError:\n        return False\n\n\ndef check_for_nested_dvc_repo(dvcfile: Path):\n    from dvc.repo import Repo\n\n    if dvcfile.is_absolute():\n        raise InvalidArgumentError(\"Use relative path to dvc.yaml.\")\n    path = dvcfile.parent\n    while path.name:\n        if (path / Repo.DVC_DIR).is_dir():\n            raise InvalidArgumentError(\n                f\"Nested DVC repos like {path} are not supported.\"\n            )\n        path = path.parent\n\n\ndef _reformat_name(name: str) -> str:\n    from gto.constants import SEPARATOR_IN_NAME, fullname_re\n\n    # NOTE: DVC accepts names like\n    #   path/to/dvc.yaml:artifact_name\n    # but Studio/GTO tags are generated with\n    #   path/to:artifact_name\n    m = fullname_re.match(name)\n    if m and m.group(\"dirname\"):\n        group = m.group(\"dirname\").rstrip(SEPARATOR_IN_NAME)\n        dirname, basename = posixpath.split(group)\n        if basename == PROJECT_FILE:\n            name = f\"{dirname}{SEPARATOR_IN_NAME}{m.group('name')}\"\n    return name\n\n\nclass Artifacts:\n    def __init__(self, repo: \"Repo\") -> None:\n        self.repo = repo\n\n    @cached_property\n    def scm(self) -> Optional[\"Git\"]:\n        from dvc.scm import Git\n\n        if isinstance(self.repo.scm, Git):\n            return self.repo.scm\n        return None\n\n    def read(self) -> dict[str, dict[str, Artifact]]:\n        \"\"\"Read artifacts from dvc.yaml.\"\"\"\n        artifacts: dict[str, dict[str, Artifact]] = {}\n        for dvcfile, dvcfile_artifacts in self.repo.index._artifacts.items():\n            dvcyaml = self.repo.fs.relpath(dvcfile, self.repo.root_dir)\n            artifacts[dvcyaml] = {}\n            for name, value in dvcfile_artifacts.items():\n                try:\n                    check_name_format(name)\n                except InvalidArgumentError as e:\n                    logger.warning(e.msg)\n                artifacts[dvcyaml][name] = Artifact(**value)\n        return artifacts\n\n    def add(self, name: str, artifact: Artifact, dvcfile: Optional[str] = None):\n        \"\"\"Add artifact to dvc.yaml.\"\"\"\n        with self.repo.scm_context(quiet=True):\n            check_name_format(name)\n            dvcyaml = Path(dvcfile or PROJECT_FILE)\n            check_for_nested_dvc_repo(\n                dvcyaml.relative_to(self.repo.root_dir)\n                if dvcyaml.is_absolute()\n                else dvcyaml\n            )\n\n            with modify_yaml(dvcyaml) as data:\n                artifacts = data.setdefault(\"artifacts\", {})\n                artifacts.update({name: artifact.to_dict()})\n\n            self.repo.scm_context.track_file(dvcfile)\n\n        return artifacts.get(name)\n\n    def get_rev(\n        self, name: str, version: Optional[str] = None, stage: Optional[str] = None\n    ):\n        \"\"\"Return revision containing the given artifact.\"\"\"\n        from gto.base import sort_versions\n        from gto.tag import find, parse_tag\n\n        assert not (version and stage)\n        name = _reformat_name(name)\n        tags: list[GitTag] = find(name=name, version=version, stage=stage, scm=self.scm)\n        if not tags:\n            raise ArtifactNotFoundError(name, version=version, stage=stage)\n        if version or stage:\n            return tags[-1].target\n        gto_tags: list[GTOTag] = sort_versions(parse_tag(tag) for tag in tags)\n        return gto_tags[0].tag.target\n\n    @classmethod\n    def parse_path(cls, name: str) -> tuple[Optional[str], str]:\n        from gto.constants import SEPARATOR_IN_NAME, fullname_re\n\n        name = _reformat_name(name)\n        m = fullname_re.match(name)\n        if not m:\n            raise ArtifactNotFoundError(name)\n        dirname = m.group(\"dirname\")\n        if dirname:\n            dirname = dirname.rstrip(SEPARATOR_IN_NAME)\n\n        return dirname, m.group(\"name\")\n\n    def get_path(self, name: str):\n        \"\"\"Return fspath for the given artifact relative to the git root.\"\"\"\n        from dvc.fs import GitFileSystem\n\n        dirname, artifact_name = self.parse_path(name)\n        # `name`/`dirname` are expected to be a git root relative.\n        # We convert it to dvc-root relative path so that we can read artifacts\n        # from dvc.yaml file.\n        # But we return dirname intact, as we want to return a git-root relative path.\n        # This is useful when reading from `dvcfs` from remote.\n        fs = self.repo.fs\n        assert self.scm\n        if isinstance(fs, GitFileSystem):\n            scm_root = fs.root_marker\n        else:\n            scm_root = self.scm.root_dir\n\n        dirparts = posixpath.normpath(dirname).split(posixpath.sep) if dirname else ()\n        abspath = fs.join(scm_root, *dirparts, PROJECT_FILE)\n        rela = fs.relpath(abspath, self.repo.root_dir)\n        try:\n            artifact = self.read()[rela][artifact_name]\n        except KeyError as exc:\n            raise ArtifactNotFoundError(name) from exc\n\n        path = posixpath.join(dirname or \"\", artifact.path)\n        parts = posixpath.normpath(path).split(posixpath.sep)\n        return os.path.join(*parts)\n\n    def download(\n        self,\n        name: str,\n        version: Optional[str] = None,\n        stage: Optional[str] = None,\n        out: Optional[str] = None,\n        force: bool = False,\n        jobs: Optional[int] = None,\n    ) -> tuple[int, str]:\n        \"\"\"Download the specified artifact.\"\"\"\n        from dvc.fs import download as fs_download\n        from dvc.repo import Repo\n\n        logger.debug(\"Trying to download artifact '%s' via DVC\", name)\n        rev = self.get_rev(name, version=version, stage=stage)\n\n        dirname, _ = self.parse_path(name)\n        with self.repo.switch(rev):\n            root = self.repo.fs.root_marker\n            _dirname = self.repo.fs.join(root, dirname) if dirname else root\n            with Repo(_dirname, fs=self.repo.fs, scm=self.repo.scm) as r:\n                path = r.artifacts.get_path(name)\n                path = self.repo.fs.join(root, as_posix(path))\n                path = self.repo.fs.relpath(path, self.repo.root_dir)\n                # when the `repo` is a subrepo, the path `/subrepo/myart.pkl` for dvcfs\n                # should be translated as `/myart.pkl`,\n                # i.e. relative to the root of the subrepo\n                path = self.repo.fs.join(root, path)\n                path = self.repo.fs.normpath(path)\n\n            out = resolve_output(path, out, force=force)\n            fs = self.repo.dvcfs\n            count = len(fs_download(fs, path, os.path.abspath(out), jobs=jobs))\n        return count, out\n\n    @staticmethod\n    def _download_studio(\n        repo_url: str,\n        name: str,\n        version: Optional[str] = None,\n        stage: Optional[str] = None,\n        out: Optional[str] = None,\n        force: bool = False,\n        jobs: Optional[int] = None,\n        dvc_studio_config: Optional[dict[str, Any]] = None,\n        **kwargs,\n    ) -> tuple[int, str]:\n        from dvc.fs import HTTPFileSystem, generic, localfs\n        from dvc.fs.callbacks import TqdmCallback\n        from dvc_studio_client.model_registry import get_download_uris\n\n        logger.debug(\"Trying to download artifact '%s' via studio\", name)\n        out = out or os.getcwd()\n        to_infos: list[str] = []\n        from_infos: list[str] = []\n        if dvc_studio_config is None:\n            dvc_studio_config = {}\n        dvc_studio_config[\"repo_url\"] = repo_url\n        try:\n            for path, url in get_download_uris(\n                repo_url,\n                name,\n                version=version,\n                stage=stage,\n                dvc_studio_config=dvc_studio_config,\n                **kwargs,\n            ).items():\n                to_info = localfs.join(out, path)\n                if localfs.exists(to_info) and not force:\n                    hint = \"\\nTo override it, re-run with '--force'.\"\n                    raise FileExistsLocallyError(  # noqa: TRY301\n                        relpath(to_info), hint=hint\n                    )\n                to_infos.append(to_info)\n                from_infos.append(url)\n        except DvcException:\n            raise\n        except Exception as exc:\n            raise DvcException(\n                f\"Failed to download artifact '{name}' via Studio\"\n            ) from exc\n        fs = HTTPFileSystem()\n        jobs = jobs or fs.jobs\n        with TqdmCallback(\n            desc=f\"Downloading '{name}' from '{repo_url}'\",\n            unit=\"files\",\n        ) as cb:\n            cb.set_size(len(from_infos))\n            generic.copy(\n                fs, from_infos, localfs, to_infos, callback=cb, batch_size=jobs\n            )\n\n        return len(to_infos), relpath(localfs.commonpath(to_infos))\n\n    @classmethod\n    def get(\n        cls,\n        url: str,\n        name: str,\n        version: Optional[str] = None,\n        stage: Optional[str] = None,\n        config: Optional[Union[str, dict[str, Any]]] = None,\n        remote: Optional[str] = None,\n        remote_config: Optional[Union[str, dict[str, Any]]] = None,\n        out: Optional[str] = None,\n        force: bool = False,\n        jobs: Optional[int] = None,\n    ):\n        from dvc.config import Config\n        from dvc.repo import Repo\n\n        if version and stage:\n            raise InvalidArgumentError(\n                \"Artifact version and stage are mutually exclusive.\"\n            )\n\n        # NOTE: We try to download the artifact up to three times\n        # 1. via studio with studio config loaded from environment\n        # 2. via studio with studio config loaded from DVC repo 'studio'\n        #    section + environment\n        # 3. via DVC remote\n\n        name = _reformat_name(name)\n        saved_exc: Optional[Exception] = None\n\n        local_dvc_studio_config = Config().get(\"studio\", {})\n        args_dvc_studio_config = {}\n        if config and not isinstance(config, dict):\n            config = Config.load_file(config)\n            args_dvc_studio_config = config.get(\"studio\", {})\n\n        try:\n            logger.trace(\"Trying studio-only config\")\n            return cls._download_studio(\n                url,\n                name,\n                version=version,\n                stage=stage,\n                out=out,\n                force=force,\n                jobs=jobs,\n                dvc_studio_config=local_dvc_studio_config | args_dvc_studio_config,\n            )\n        except FileExistsLocallyError:\n            raise\n        except Exception as exc:  # noqa: BLE001\n            saved_exc = exc\n\n        with Repo.open(\n            url=url,\n            subrepos=True,\n            uninitialized=True,\n            config=config,\n            remote=remote,\n            remote_config=remote_config,\n        ) as repo:\n            logger.trace(\"Trying repo [studio] config\")\n            repo_dvc_studio_config = repo.config.get(\"studio\", {})\n            try:\n                return cls._download_studio(\n                    url,\n                    name,\n                    version=version,\n                    stage=stage,\n                    out=out,\n                    force=force,\n                    jobs=jobs,\n                    dvc_studio_config=local_dvc_studio_config\n                    | repo_dvc_studio_config\n                    | args_dvc_studio_config,\n                )\n            except FileExistsLocallyError:\n                raise\n            except Exception as exc:  # noqa: BLE001\n                saved_exc = exc\n\n            try:\n                return repo.artifacts.download(\n                    name,\n                    version=version,\n                    stage=stage,\n                    out=out,\n                    force=force,\n                    jobs=jobs,\n                )\n            except FileExistsLocallyError:\n                raise\n            except Exception as exc:\n                if saved_exc:\n                    logger.exception(str(saved_exc), exc_info=saved_exc.__cause__)\n                raise DvcException(\n                    f\"Failed to download artifact '{name}' via DVC remote\"\n                ) from exc\n"
  },
  {
    "path": "dvc/repo/brancher.py",
    "content": "from collections.abc import Iterator\nfrom contextlib import contextmanager\nfrom typing import TYPE_CHECKING, Optional\n\nfrom scmrepo.git import Git\n\nfrom dvc.exceptions import NotDvcRepoError\nfrom dvc.log import logger\nfrom dvc.scm import iter_revs\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\ndef brancher(\n    self,\n    revs=None,\n    all_branches=False,\n    all_tags=False,\n    all_commits=False,\n    all_experiments=False,\n    workspace=True,\n    commit_date: Optional[str] = None,\n    sha_only=False,\n    num=1,\n):\n    \"\"\"Generator that iterates over specified revisions.\n\n    Args:\n        revs (list): a list of revisions to iterate over.\n        all_branches (bool): iterate over all available branches.\n        all_commits (bool): iterate over all commits.\n        all_tags (bool): iterate over all available tags.\n        workspace (bool): include workspace.\n        commit_date (str): Keep experiments from the commits after(include)\n                            a certain date. Date must match the extended\n                            ISO 8601 format (YYYY-MM-DD).\n        sha_only (bool): only return git SHA for a revision.\n\n    Yields:\n        str: the display name for the currently selected fs, it could be:\n            - a git revision identifier\n            - empty string it there is no branches to iterate over\n            - \"workspace\" if there are uncommitted changes in the SCM repo\n    \"\"\"\n    if not any(\n        [\n            revs,\n            all_branches,\n            all_tags,\n            all_commits,\n            all_experiments,\n            commit_date,\n        ]\n    ):\n        yield \"\"\n        return\n\n    from dvc.fs import LocalFileSystem\n\n    repo_root_parts: tuple[str, ...] = ()\n    if self.fs.isin(self.root_dir, self.scm.root_dir):\n        repo_root_parts = self.fs.relparts(self.root_dir, self.scm.root_dir)\n\n    cwd_parts: tuple[str, ...] = ()\n    if self.fs.isin(self.fs.getcwd(), self.scm.root_dir):\n        cwd_parts = self.fs.relparts(self.fs.getcwd(), self.scm.root_dir)\n\n    saved_fs = self.fs\n    saved_root = self.root_dir\n    saved_dvc_dir = self.dvc_dir\n\n    scm = self.scm\n\n    logger.trace(\"switching fs to workspace\")\n    self.fs = LocalFileSystem(url=self.root_dir)\n    if workspace:\n        yield \"workspace\"\n\n    revs = revs.copy() if revs else []\n    if \"workspace\" in revs:\n        revs.remove(\"workspace\")\n\n    found_revs = iter_revs(\n        scm,\n        revs,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        all_experiments=all_experiments,\n        commit_date=commit_date,\n        num=num,\n    )\n\n    try:\n        for sha, names in found_revs.items():\n            try:\n                _switch_fs(self, sha, repo_root_parts, cwd_parts)\n                yield sha if sha_only else \",\".join(names)\n            except NotDvcRepoError:\n                # ignore revs that don't contain repo root\n                # (i.e. revs from before a subdir=True repo was init'ed)\n                pass\n    finally:\n        self.fs = saved_fs\n        self.root_dir = saved_root\n        self.dvc_dir = saved_dvc_dir\n        self._reset()\n\n\ndef _switch_fs(\n    repo: \"Repo\",\n    rev: str,\n    repo_root_parts: tuple[str, ...],\n    cwd_parts: tuple[str, ...],\n):\n    from dvc.fs import GitFileSystem, LocalFileSystem\n\n    if rev == \"workspace\":\n        logger.trace(\"switching fs to workspace\")\n        repo.fs = LocalFileSystem(url=repo.root_dir)\n        return\n\n    logger.trace(\"switching fs to revision %s\", rev[:7])\n    assert isinstance(repo.scm, Git)\n    fs = GitFileSystem(scm=repo.scm, rev=rev)\n    root_dir = repo.fs.join(\"/\", *repo_root_parts)\n    if not fs.exists(root_dir):\n        raise NotDvcRepoError(f\"Commit '{rev[:7]}' does not contain a DVC repo\")\n\n    repo.fs = fs\n    repo.root_dir = root_dir\n    repo.dvc_dir = fs.join(root_dir, repo.DVC_DIR)\n    repo._reset()\n\n    if cwd_parts:\n        cwd = repo.fs.join(\"/\", *cwd_parts)\n        repo.fs.chdir(cwd)\n\n\n@contextmanager\ndef switch(repo: \"Repo\", rev: str) -> Iterator[str]:\n    \"\"\"Switch to a specific revision.\"\"\"\n    from dvc.scm import resolve_rev\n\n    if rev != \"workspace\":\n        rev = resolve_rev(repo.scm, rev)\n\n    repo_root_parts: tuple[str, ...] = ()\n    if repo.fs.isin(repo.root_dir, repo.scm.root_dir):\n        repo_root_parts = repo.fs.relparts(repo.root_dir, repo.scm.root_dir)\n\n    cwd_parts: tuple[str, ...] = ()\n    if repo.fs.isin(repo.fs.getcwd(), repo.scm.root_dir):\n        cwd_parts = repo.fs.relparts(repo.fs.getcwd(), repo.scm.root_dir)\n\n    saved_fs = repo.fs\n    saved_root = repo.root_dir\n    saved_dvc_dir = repo.dvc_dir\n    try:\n        _switch_fs(repo, rev, repo_root_parts, cwd_parts)\n        yield rev\n    finally:\n        repo.fs = saved_fs\n        repo.root_dir = saved_root\n        repo.dvc_dir = saved_dvc_dir\n        repo._reset()\n"
  },
  {
    "path": "dvc/repo/cache.py",
    "content": "import os\n\n\ndef check_missing(repo, rev=None, max_size=None, types=None):\n    from dvc_data.index import StorageKeyError\n\n    with repo.switch(rev or \"workspace\"):\n        idx = repo.index.targets_view(None, max_size=max_size, types=types)\n\n        index = idx.data[\"repo\"]\n\n    def onerror(_entry, _exc):\n        pass\n\n    index.onerror = onerror\n\n    ret = []\n    for _, entry in index.iteritems():\n        try:\n            fs, path = index.storage_map.get_cache(entry)\n        except (StorageKeyError, ValueError):\n            continue\n\n        if not fs.exists(path):\n            typ = \"directory\" if (entry.meta and entry.meta.isdir) else \"file\"\n            ret.append(\n                (\n                    typ,\n                    entry.hash_info.name,\n                    entry.hash_info.value,\n                    os.path.join(*entry.key),\n                )\n            )\n\n    return ret\n"
  },
  {
    "path": "dvc/repo/checkout.py",
    "content": "import os\nfrom collections import defaultdict\nfrom typing import TYPE_CHECKING\n\nfrom dvc.exceptions import (\n    CheckoutError,\n    CheckoutErrorSuggestGit,\n    DvcException,\n    NoOutputOrStageError,\n)\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils import relpath\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from dvc.repo.index import IndexView\n    from dvc_data.index import BaseDataIndex, DataIndexEntry, DataIndexKey\n    from dvc_data.index.diff import Change\n    from dvc_objects.fs.base import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\ndef _fspath_dir(path):\n    if not os.path.exists(str(path)):\n        return str(path)\n\n    path = relpath(path)\n    return os.path.join(path, \"\") if os.path.isdir(path) else path\n\n\ndef _remove_unused_links(repo):\n    used = [out.fspath for out in repo.index.outs if out.protocol == \"local\"]\n    unused = repo.state.get_unused_links(used, repo.fs)\n    ret = [_fspath_dir(u) for u in unused]\n    repo.state.remove_links(unused, repo.fs)\n    return ret\n\n\ndef _build_out_changes(\n    index: \"IndexView\", changes: dict[\"DataIndexKey\", \"Change\"]\n) -> dict[\"DataIndexKey\", tuple[str, dict[str, int]]]:\n    from dvc_data.index.checkout import MODIFY\n\n    out_keys: list[DataIndexKey] = []\n    for out in index.outs:\n        if not out.use_cache:\n            continue\n\n        ws, key = out.index_key\n        if ws != \"repo\":\n            continue\n        out_keys.append(key)\n\n    out_stats: dict[DataIndexKey, dict[str, int]]\n    out_stats = defaultdict(lambda: defaultdict(int))\n\n    out_changes: dict[DataIndexKey, tuple[str, dict[str, int]]] = {}\n    for key, change in changes.items():\n        typ = change.typ\n        isdir = change.new and change.new.isdir\n        for out_key in out_keys:\n            if len(out_key) > len(key) or key[: len(out_key)] != out_key:\n                continue\n\n            stats = out_stats[out_key]\n            if not isdir:\n                stats[typ] += 1\n\n            if key == out_key:\n                out_changes[out_key] = typ, stats\n            elif out_key not in out_changes:\n                typ = MODIFY\n                out_changes[out_key] = typ, stats\n            break\n\n    return out_changes\n\n\ndef _check_can_delete(\n    entries: list[\"DataIndexEntry\"],\n    index: \"BaseDataIndex\",\n    path: str,\n    fs: \"FileSystem\",\n):\n    entry_paths = []\n    for entry in entries:\n        try:\n            cache_fs, cache_path = index.storage_map.get_cache(entry)\n        except ValueError:\n            continue\n\n        if cache_fs.exists(cache_path):\n            continue\n\n        entry_paths.append(fs.join(path, *(entry.key or ())))\n\n    if not entry_paths:\n        return\n\n    raise DvcException(\n        \"Can't remove the following unsaved files without confirmation. \"\n        \"Use `--force` to force.\\n\" + \"\\n\".join(entry_paths)\n    )\n\n\n@locked\ndef checkout(  # noqa: C901\n    self,\n    targets=None,\n    with_deps=False,\n    force=False,\n    relink=False,\n    recursive=False,\n    allow_missing=False,\n    **kwargs,\n):\n    from dvc.repo.index import build_data_index\n    from dvc.stage.exceptions import StageFileBadNameError, StageFileDoesNotExistError\n    from dvc_data.index.checkout import ADD, DELETE, MODIFY, apply, compare\n\n    stats = {\"modified\": 0, \"added\": 0, \"deleted\": 0}\n    changes: dict[str, list[str]] = {\"modified\": [], \"added\": [], \"deleted\": []}\n\n    if not targets:\n        targets = [None]\n        changes[\"deleted\"] = _remove_unused_links(self)\n        stats[\"deleted\"] = len(changes[\"deleted\"])\n\n    if isinstance(targets, str):\n        targets = [targets]\n\n    def onerror(target, exc):\n        if target and isinstance(\n            exc,\n            (StageFileDoesNotExistError, StageFileBadNameError, NoOutputOrStageError),\n        ):\n            raise CheckoutErrorSuggestGit(target) from exc\n        raise  # noqa: PLE0704\n\n    from .index import index_from_targets\n\n    view = index_from_targets(\n        self, targets=targets, recursive=recursive, with_deps=with_deps, onerror=onerror\n    )\n\n    with ui.progress(unit=\"entry\", desc=\"Building workspace index\", leave=True) as pb:\n        old = build_data_index(\n            view, self.root_dir, self.fs, compute_hash=True, callback=pb.as_callback()\n        )\n\n    new = view.data[\"repo\"]\n\n    with ui.progress(desc=\"Comparing indexes\", unit=\"entry\", leave=True) as pb:\n        diff = compare(old, new, relink=relink, delete=True, callback=pb.as_callback())\n\n    if not force:\n        _check_can_delete(diff.files_delete, new, self.root_dir, self.fs)\n\n    failed = set()\n    out_paths = [out.fs_path for out in view.outs if out.use_cache and out.is_in_repo]\n\n    def checkout_onerror(src_path, dest_path, _exc):\n        logger.debug(\n            \"failed to create '%s' from '%s'\",\n            dest_path,\n            src_path,\n            exc_info=True,  # noqa: LOG014\n        )\n\n        for out_path in out_paths:\n            if self.fs.isin_or_eq(dest_path, out_path):\n                failed.add(out_path)\n\n    with ui.progress(unit=\"file\", desc=\"Applying changes\", leave=True) as pb:\n        apply(\n            diff,\n            self.root_dir,\n            self.fs,\n            callback=pb.as_callback(),\n            update_meta=False,\n            onerror=checkout_onerror,\n            state=self.state,\n            **kwargs,\n        )\n\n    out_changes = _build_out_changes(view, diff.changes)\n\n    typ_map = {ADD: \"added\", DELETE: \"deleted\", MODIFY: \"modified\"}\n    for key, (typ, _stats) in out_changes.items():\n        out_path = self.fs.join(self.root_dir, *key)\n\n        if out_path in failed:\n            self.fs.remove(out_path, recursive=True)\n            continue\n\n        self.state.save_link(out_path, self.fs)\n        for t, count in _stats.items():\n            stats_typ = typ_map[t]\n            stats[stats_typ] += count\n\n        changes[typ_map[typ]].append(_fspath_dir(out_path))\n\n    for changelist in changes.values():\n        # group directories first, then files. But keep them alphabetically sorted\n        changelist.sort(key=lambda p: (not p.endswith(os.sep), p))\n\n    result = changes | {\"stats\": stats}\n    if failed and not allow_missing:\n        result[\"failed\"] = [relpath(out_path) for out_path in failed]\n        raise CheckoutError([relpath(out_path) for out_path in failed], result)\n    return result\n"
  },
  {
    "path": "dvc/repo/collect.py",
    "content": "from collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Callable, Optional\n\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from dvc.output import Output\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\nFilterFn = Callable[[\"Output\"], bool]\nOutputs = list[\"Output\"]\nStrPaths = list[str]\n\n\ndef _collect_outs(\n    repo: \"Repo\", output_filter: Optional[FilterFn] = None, deps: bool = False\n) -> Outputs:\n    index = repo.index\n    index.check_graph()  # ensure graph is correct\n    return list(filter(output_filter, index.deps if deps else index.outs))\n\n\ndef _collect_paths(\n    repo: \"Repo\",\n    targets: Iterable[str],\n    recursive: bool = False,\n) -> StrPaths:\n    from dvc.fs.dvc import DVCFileSystem\n\n    fs = DVCFileSystem(repo=repo)\n    fs_paths = [fs.from_os_path(target) for target in targets]\n\n    target_paths: StrPaths = []\n    for fs_path in fs_paths:\n        if recursive and fs.isdir(fs_path):\n            target_paths.extend(fs.find(fs_path))\n        target_paths.append(fs_path)\n\n    return target_paths\n\n\ndef _filter_outs(\n    repo: \"Repo\", outs: Outputs, fs_paths: StrPaths, duplicates=False\n) -> tuple[Outputs, StrPaths]:\n    res_outs: Outputs = []\n    fs_res_paths = fs_paths\n\n    for out in outs:\n        fs_path = repo.dvcfs.from_os_path(out.fs_path)\n        if fs_path in fs_paths:\n            res_outs.append(out)\n            if not duplicates:\n                # MUTATING THE SAME LIST!!\n                fs_res_paths.remove(fs_path)\n\n    return res_outs, fs_res_paths\n\n\ndef collect(\n    repo: \"Repo\",\n    deps: bool = False,\n    targets: Optional[Iterable[str]] = None,\n    output_filter: Optional[FilterFn] = None,\n    recursive: bool = False,\n    duplicates: bool = False,\n) -> tuple[Outputs, StrPaths]:\n    assert targets or output_filter\n\n    outs: Outputs = _collect_outs(repo, output_filter=output_filter, deps=deps)\n\n    if not targets:\n        fs_paths: StrPaths = []\n        return outs, fs_paths\n\n    target_paths = _collect_paths(repo, targets, recursive=recursive)\n\n    return _filter_outs(repo, outs, target_paths, duplicates=duplicates)\n"
  },
  {
    "path": "dvc/repo/commit.py",
    "content": "from itertools import groupby\nfrom typing import TYPE_CHECKING\n\nfrom dvc import prompt\n\nfrom . import locked\nfrom .scm_context import scm_context\n\nif TYPE_CHECKING:\n    from . import Repo\n    from .index import IndexView\n\n\ndef _prepare_message(stage, changes):\n    changed_deps, changed_outs, changed_stage = changes\n    if changed_deps and changed_outs:\n        msg = \"dependencies {deps} and outputs {outs} of {stage} changed.\"\n    elif changed_deps:\n        msg = \"dependencies {deps} of {stage} changed.\"\n    elif changed_outs:\n        msg = \"outputs {outs} of {stage} changed.\"\n    else:\n        msg = \"{stage_changed}\"\n\n    msg += \" Are you sure you want to commit it?\"\n\n    kw = {\n        \"stage\": stage,\n        \"deps\": changed_deps,\n        \"outs\": changed_outs,\n        \"stage_changed\": changed_stage,\n    }\n    return msg.format_map(kw)\n\n\ndef prompt_to_commit(stage, changes, force=False):\n    from dvc.stage.exceptions import StageCommitError\n\n    if not (force or prompt.confirm(_prepare_message(stage, changes))):\n        raise StageCommitError(\n            f\"unable to commit changed {stage}. Use `-f|--force` to force.\"\n        )\n\n\n@locked\ndef commit(\n    self,\n    target=None,\n    with_deps=False,\n    recursive=False,\n    force=False,\n    allow_missing=False,\n    data_only=False,\n    relink=True,\n):\n    committed_stages = []\n    groups = groupby(\n        [\n            info\n            for info in self.stage.collect_granular(\n                target, with_deps=with_deps, recursive=recursive\n            )\n            if not data_only or info.stage.is_data_source\n        ],\n        key=lambda info: info.stage.dvcfile,\n    )\n\n    for dvcfile, stages_info_group in groups:\n        to_dump = []\n        for stage_info in stages_info_group:\n            stage = stage_info.stage\n            if force:\n                stage.save(allow_missing=allow_missing)\n            else:\n                changes = stage.changed_entries()\n                if any(changes):\n                    prompt_to_commit(stage, changes, force=force)\n                    stage.save(allow_missing=allow_missing)\n            stage.commit(\n                filter_info=stage_info.filter_info,\n                allow_missing=allow_missing,\n                relink=relink,\n            )\n            to_dump.append(stage)\n        dvcfile.dump_stages(to_dump, update_pipeline=False)\n        committed_stages.extend(to_dump)\n    return committed_stages\n\n\n@locked\n@scm_context\ndef commit_2_to_3(repo: \"Repo\", dry: bool = False):\n    \"\"\"Force-commit all legacy outputs to use DVC 3.0 hashes.\"\"\"\n    from dvc.dvcfile import ProjectFile\n    from dvc.ui import ui\n\n    view = repo.index.targets_view(\n        targets=None,\n        outs_filter=lambda o: o.hash_name == \"md5-dos2unix\",\n        recursive=True,\n    )\n    migrated = _migrateable_dvcfiles(view)\n    if not migrated:\n        ui.write(\"No DVC files in the repo to migrate to the 3.0 format.\")\n        return\n    if dry:\n        ui.write(\"Entries in following DVC files will be migrated to the 3.0 format:\")\n        ui.write(\"\\n\".join(sorted(f\"\\t{file}\" for file in migrated)))\n        return\n    for stage, filter_info in view._stage_infos:\n        outs_filter = view._outs_filter\n        outs = {\n            out\n            for out in stage.filter_outs(filter_info)\n            if outs_filter is not None and outs_filter(out)\n        }\n        modified = False\n        if outs:\n            for out in outs:\n                out.update_legacy_hash_name(force=True)\n            modified = True\n        deps = {dep for dep in stage.deps if not stage.is_import and dep.is_in_repo}\n        if deps:\n            for dep in deps:\n                dep.update_legacy_hash_name(force=True)\n            modified = True\n        if modified:\n            stage.save(allow_missing=True)\n            stage.commit(allow_missing=True, relink=True)\n            if not isinstance(stage.dvcfile, ProjectFile):\n                ui.write(f\"Updating DVC file '{stage.dvcfile.relpath}'\")\n            stage.dump(update_pipeline=False)\n\n\ndef _migrateable_dvcfiles(view: \"IndexView\") -> set[str]:\n    from dvc.dvcfile import ProjectFile\n\n    migrated = set()\n    for stage, filter_info in view._stage_infos:\n        outs_filter = view._outs_filter\n        dvcfile = stage.dvcfile.relpath\n        assert outs_filter\n        if any(outs_filter(out) for out in stage.filter_outs(filter_info)) or (\n            not stage.is_import\n            and any(\n                dep.is_in_repo and dep.hash_name == \"md5-dos2unix\" for dep in stage.deps\n            )\n        ):\n            if isinstance(stage.dvcfile, ProjectFile):\n                lockfile = stage.dvcfile._lockfile.relpath\n                migrated.add(f\"{dvcfile} ({lockfile})\")\n            else:\n                migrated.add(dvcfile)\n    return migrated\n"
  },
  {
    "path": "dvc/repo/data.py",
    "content": "import os\nimport posixpath\nfrom collections import defaultdict, deque\nfrom collections.abc import Iterable, Iterator, Mapping\nfrom typing import TYPE_CHECKING, Optional, TypedDict, Union\n\nfrom dvc.fs.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc_data.index import DataIndexDirError\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.scm import Git, NoSCM\n    from dvc_data.index import (\n        BaseDataIndex,\n        DataIndex,\n        DataIndexEntry,\n        DataIndexKey,\n        DataIndexView,\n    )\n    from dvc_data.index.diff import Change\n    from dvc_objects.fs.base import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\ndef posixpath_to_os_path(path: str) -> str:\n    return path.replace(posixpath.sep, os.path.sep)\n\n\ndef _adapt_path(change: \"Change\") -> str:\n    isdir = False\n    if change.new and change.new.meta:\n        isdir = change.new.meta.isdir\n    elif change.old and change.old.meta:\n        isdir = change.old.meta.isdir\n    key = change.key\n    if isdir:\n        key = (*key, \"\")\n    return os.path.sep.join(key)\n\n\ndef _adapt_path_from_entry(entry: \"DataIndexEntry\") -> str:\n    key = entry.key\n    assert key\n    if entry.meta and entry.meta.isdir:\n        key = (*key, \"\")\n    return os.sep.join(key)\n\n\ndef _get_missing_paths(\n    to_check: Mapping[\"FileSystem\", Mapping[str, Iterable[\"DataIndexEntry\"]]],\n    batch_size: Optional[int] = None,\n    callback: \"Callback\" = DEFAULT_CALLBACK,\n) -> Iterator[str]:\n    for fs, paths_map in to_check.items():\n        if batch_size == 1 or (batch_size is None and fs.protocol == \"local\"):\n            results = list(callback.wrap(map(fs.exists, paths_map)))\n        else:\n            results = fs.exists(\n                list(paths_map), batch_size=batch_size, callback=callback\n            )\n\n        for cache_path, exists in zip(paths_map, results):\n            if exists:\n                continue\n\n            for entry in paths_map[cache_path]:\n                yield _adapt_path_from_entry(entry)\n\n\nclass StorageCallback(Callback):\n    def __init__(self, parent_cb: Callback) -> None:\n        super().__init__(size=0, value=0)\n        self.parent_cb = parent_cb\n\n    def set_size(self, size: int) -> None:\n        # This is a no-op to prevent `fs.exists` from trying to set the size\n        pass\n\n    def relative_update(self, value: int = 1) -> None:\n        self.parent_cb.relative_update(value)\n\n    def absolute_update(self, value: int) -> None:\n        self.parent_cb.relative_update(value - self.value)\n\n\nclass Rename(TypedDict):\n    old: str\n    new: str\n\n\nclass DiffResult(TypedDict, total=False):\n    modified: list[str]\n    added: list[str]\n    deleted: list[str]\n    renamed: list[Rename]\n    unchanged: list[str]\n    unknown: list[str]\n    not_in_cache: list[str]\n\n\ndef _diff(\n    old: \"BaseDataIndex\",\n    new: \"BaseDataIndex\",\n    *,\n    filter_keys: Optional[Iterable[\"DataIndexKey\"]] = None,\n    granular: bool = False,\n    not_in_cache: bool = False,\n    batch_size: Optional[int] = None,\n    callback: \"Callback\" = DEFAULT_CALLBACK,\n    with_renames: bool = False,\n) -> DiffResult:\n    from dvc_data.index.diff import (\n        ADD,\n        DELETE,\n        MODIFY,\n        RENAME,\n        UNCHANGED,\n        UNKNOWN,\n        diff,\n    )\n\n    ret: DiffResult = defaultdict(list)  # type: ignore[assignment]\n    change_types = {\n        MODIFY: \"modified\",\n        ADD: \"added\",\n        DELETE: \"deleted\",\n        RENAME: \"renamed\",\n    }\n\n    to_check: dict[FileSystem, dict[str, list[DataIndexEntry]]] = defaultdict(\n        lambda: defaultdict(list)\n    )\n\n    for change in diff(\n        old,\n        new,\n        with_unchanged=True,\n        shallow=not granular,\n        hash_only=True,\n        with_unknown=True,\n        with_renames=with_renames,\n        callback=callback,\n    ):\n        typ = change.typ\n\n        # The index is a trie, so even when we filter by a specific path\n        # like `dir/file`, all parent nodes leading to that path (e.g., `dir/`)\n        # still appear in the view. As a result, keys like `dir/` will be present\n        # even if only `dir/file` matches the filter.\n        # We need to skip such entries to avoid showing root of tracked directories.\n        if filter_keys:\n            # RENAME does not have a `change.key`\n            if typ == RENAME:\n                assert change.new\n                key = change.new.key\n                # match with \"new\" key only\n                assert key\n            else:\n                key = change.key\n\n            if not any(key[: len(fk)] == fk for fk in filter_keys):\n                continue\n\n        if (\n            typ == UNCHANGED\n            and (not change.old or not change.old.hash_info)\n            and (not change.new or not change.new.hash_info)\n        ):\n            # NOTE: emulating previous behaviour\n            continue\n\n        if typ == UNKNOWN and not change.new:\n            # NOTE: emulating previous behaviour\n            continue\n\n        if not_in_cache and change.old and change.old.hash_info:\n            old_entry = change.old\n            cache_fs, cache_path = old.storage_map.get_cache(old_entry)\n            # check later in batches\n            to_check[cache_fs][cache_path].append(old_entry)\n\n        change_typ = change_types.get(typ, typ)\n        if typ == RENAME:\n            assert change.old is not None\n            assert change.new is not None\n            ret[\"renamed\"].append(\n                {\n                    \"old\": _adapt_path_from_entry(change.old),\n                    \"new\": _adapt_path_from_entry(change.new),\n                }\n            )\n        else:\n            ret[change_typ].append(_adapt_path(change))  # type: ignore[literal-required]\n\n    total_items = sum(\n        len(entries) for paths in to_check.values() for entries in paths.values()\n    )\n    with TqdmCallback(size=total_items, desc=\"Checking cache\", unit=\"entry\") as cb:\n        missing_items = list(\n            _get_missing_paths(\n                to_check, batch_size=batch_size, callback=StorageCallback(cb)\n            ),\n        )\n        if missing_items:\n            ret[\"not_in_cache\"] = missing_items\n    return dict(ret)  # type: ignore[return-value]\n\n\nclass GitInfo(TypedDict, total=False):\n    staged: dict[str, list[str]]\n    unstaged: dict[str, list[str]]\n    untracked: list[str]\n    is_empty: bool\n    is_dirty: bool\n\n\ndef _git_info(scm: Union[\"Git\", \"NoSCM\"], untracked_files: str = \"all\") -> GitInfo:\n    from scmrepo.exceptions import SCMError\n\n    from dvc.scm import NoSCM\n\n    if isinstance(scm, NoSCM):\n        return {}\n\n    try:\n        scm.get_rev()\n    except SCMError:\n        empty_repo = True\n    else:\n        empty_repo = False\n\n    staged, unstaged, untracked = scm.status(untracked_files=untracked_files)\n    if os.name == \"nt\":\n        untracked = [posixpath_to_os_path(path) for path in untracked]\n    # NOTE: order is important here.\n    return GitInfo(\n        staged=staged,\n        unstaged=unstaged,\n        untracked=untracked,\n        is_empty=empty_repo,\n        is_dirty=any([staged, unstaged, untracked]),\n    )\n\n\ndef filter_index(\n    index: Union[\"DataIndex\", \"DataIndexView\"],\n    filter_keys: Optional[Iterable[\"DataIndexKey\"]] = None,\n) -> \"BaseDataIndex\":\n    from dvc_data.index.view import DataIndexView\n\n    if not filter_keys:\n        return index\n\n    if isinstance(index, DataIndexView):\n        orig_index = index._index\n        parent_filter_fn = index.filter_fn\n    else:\n        orig_index = index\n        parent_filter_fn = None\n\n    def filter_fn(key: \"DataIndexKey\") -> bool:\n        if parent_filter_fn is not None and not parent_filter_fn(key):\n            return False\n\n        for filter_key in filter_keys:\n            # eg: if key is \"dir/file\" and filter_key is \"dir/\", return True\n            if key[: len(filter_key)] == filter_key:\n                return True\n            # eg: if key is `dir/` and filter_key is `dir/file`, also return True.\n            # This ensures we include parent prefixes needed to reach matching leaves.\n            # Intermediate prefixes must be retained to access nested keys.\n            if filter_key[: len(key)] == key:\n                return True\n        return False\n\n    from dvc_data.index import view\n\n    return view(orig_index, filter_fn=filter_fn)\n\n\ndef _diff_index_to_wtree(\n    repo: \"Repo\",\n    filter_keys: Optional[Iterable[\"DataIndexKey\"]] = None,\n    granular: bool = False,\n    batch_size: Optional[int] = None,\n    with_renames: bool = False,\n) -> DiffResult:\n    from .index import build_data_index\n\n    with ui.progress(desc=\"Building workspace index\", unit=\"entry\") as pb:\n        workspace = build_data_index(\n            repo.index,\n            repo.root_dir,\n            repo.fs,\n            compute_hash=True,\n            callback=pb.as_callback(),\n        )\n        workspace_view = filter_index(workspace, filter_keys=filter_keys)\n\n    with ui.progress(\n        desc=\"Calculating diff between index/workspace\",\n        unit=\"entry\",\n    ) as pb:\n        index = repo.index.data[\"repo\"]\n        view = filter_index(index, filter_keys=filter_keys)\n        return _diff(\n            view,\n            workspace_view,\n            filter_keys=filter_keys,\n            granular=granular,\n            not_in_cache=True,\n            with_renames=with_renames,\n            batch_size=batch_size,\n            callback=pb.as_callback(),\n        )\n\n\ndef _diff_head_to_index(\n    repo: \"Repo\",\n    head: str = \"HEAD\",\n    filter_keys: Optional[Iterable[\"DataIndexKey\"]] = None,\n    granular: bool = False,\n    with_renames: bool = False,\n) -> DiffResult:\n    from dvc.exceptions import NotDvcRepoError\n    from dvc.scm import RevError\n    from dvc_data.index import DataIndex\n\n    index = repo.index.data[\"repo\"]\n    index_view = filter_index(index, filter_keys=filter_keys)\n\n    try:\n        with repo.switch(head):\n            head_index = repo.index.data[\"repo\"]\n            head_view = filter_index(head_index, filter_keys=filter_keys)\n    except RevError:\n        logger.debug(\"failed to switch to '%s'\", head)\n        head_view = DataIndex()\n    except NotDvcRepoError as exc:\n        # NOTE: this only gets raised on subdir repos at the moment,\n        # which looks like a bug in `repo.switch`.\n        logger.warning(exc)\n        head_view = DataIndex()\n\n    with ui.progress(desc=\"Calculating diff between head/index\", unit=\"entry\") as pb:\n        return _diff(\n            head_view,\n            index_view,\n            filter_keys=filter_keys,\n            granular=granular,\n            with_renames=with_renames,\n            callback=pb.as_callback(),\n        )\n\n\nclass Status(TypedDict):\n    not_in_cache: list[str]\n    not_in_remote: list[str]\n    committed: DiffResult\n    uncommitted: DiffResult\n    untracked: list[str]\n    unchanged: list[str]\n    git: GitInfo\n\n\ndef _transform_git_paths_to_dvc(repo: \"Repo\", files: Iterable[str]) -> list[str]:\n    \"\"\"Transform files rel. to Git root to DVC root, and drop outside files.\"\"\"\n    rel = repo.fs.relpath(repo.root_dir, repo.scm.root_dir).rstrip(\"/\")\n\n    # if we have repo root in a different location than scm's root,\n    # i.e. subdir repo, all git_paths need to be transformed rel. to the DVC\n    # repo root and anything outside need to be filtered out.\n    if rel not in (os.curdir, \"\"):\n        prefix = rel + os.sep\n        length = len(prefix)\n        files = (file[length:] for file in files if file.startswith(prefix))\n\n    start = repo.fs.relpath(repo.fs.getcwd(), repo.root_dir)\n    if start in (os.curdir, \"\"):\n        return list(files)\n    # we need to convert repo relative paths to curdir relative.\n    return [repo.fs.relpath(file, start) for file in files]\n\n\ndef iter_index(\n    index: Union[\"BaseDataIndex\", \"DataIndexView\"], shallow: bool = False\n) -> Iterator[tuple[\"DataIndexKey\", Optional[\"DataIndexEntry\"]]]:\n    if not shallow:\n        yield from index.iteritems(shallow=shallow)\n        return\n\n    # only iterate until we find entries with hash_info in shallow mode\n    todo: deque[tuple[DataIndexKey, dict]] = deque([((), index.info(()))])\n    while todo:\n        key, info = todo.popleft()\n        entry = info.get(\"entry\")\n        if info.get(\"type\") == \"directory\" and not (entry and entry.hash_info):\n            try:\n                todo.extend(index.ls(key, detail=True))\n            except (KeyError, DataIndexDirError):\n                pass\n        yield key, entry\n\n\ndef _get_entries_not_in_remote(\n    repo: \"Repo\",\n    filter_keys: Optional[Iterable[\"DataIndexKey\"]] = None,\n    granular: bool = False,\n    remote_refresh: bool = False,\n) -> list[str]:\n    \"\"\"Get entries that are not in remote storage.\"\"\"\n    from dvc.repo.worktree import worktree_view\n    from dvc_data.index import StorageKeyError\n\n    entries: dict[DataIndexKey, DataIndexEntry] = {}\n\n    def _onerror(entry, exc):\n        if not isinstance(exc, DataIndexDirError):\n            raise exc\n        # We don't have the contents of this dir file, so we will only check this key.\n        entries[entry.key] = entry\n\n    # View into the index, with only pushable entries\n    index = worktree_view(repo.index, push=True)\n    data_index = index.data[\"repo\"]\n\n    orig_data_index_onerror = data_index.onerror\n    data_index.onerror = _onerror\n\n    view = filter_index(data_index, filter_keys=filter_keys)  # type: ignore[arg-type]\n\n    missing_entries = []\n\n    storage_map = view.storage_map\n\n    n = 0\n    with TqdmCallback(size=n, desc=\"Checking remote\", unit=\"entry\") as cb:\n        for key, entry in iter_index(view, shallow=not granular):\n            if not (entry and entry.hash_info):\n                continue\n\n            # The index is a trie, so even when we filter by a specific path\n            # like `dir/file`, all parent nodes leading to that path (e.g., `dir/`)\n            # still appear in the view. As a result, keys like `dir/` will be present\n            # even if only `dir/file` matches the filter.\n            # We need to skip such entries to avoid showing root of tracked directories.\n            if filter_keys and not any(\n                key[: len(filter_key)] == filter_key for filter_key in filter_keys\n            ):\n                continue\n\n            entries[key] = entry\n            n += 1\n            cb.set_size(n)\n\n        results = storage_map.bulk_remote_exists(\n            list(entries.values()), refresh=remote_refresh, callback=cb\n        )\n        for key, entry in entries.items():\n            k = (*key, \"\") if entry.meta and entry.meta.isdir else key\n            try:\n                if not results.get(entry, False):\n                    missing_entries.append(os.path.sep.join(k))\n            except StorageKeyError:\n                pass\n\n    data_index.onerror = orig_data_index_onerror\n    return missing_entries\n\n\ndef _matches_target(p: str, targets: Iterable[str]) -> bool:\n    sep = os.sep\n    return any(p == t or p.startswith(t + sep) for t in targets)\n\n\ndef _prune_keys(filter_keys: Iterable[\"DataIndexKey\"]) -> list[\"DataIndexKey\"]:\n    sorted_keys = sorted(set(filter_keys), key=len)\n    result: list[DataIndexKey] = []\n\n    for key in sorted_keys:\n        if not any(key[: len(prefix)] == prefix for prefix in result):\n            result.append(key)\n    return result\n\n\ndef status(  # noqa: PLR0913\n    repo: \"Repo\",\n    targets: Optional[Iterable[Union[os.PathLike[str], str]]] = None,\n    *,\n    granular: bool = False,\n    untracked_files: str = \"no\",\n    remote: Optional[str] = None,\n    not_in_remote: bool = False,\n    remote_refresh: bool = False,\n    config: Optional[dict] = None,\n    batch_size: Optional[int] = None,\n    head: str = \"HEAD\",\n    with_renames: bool = True,\n) -> Status:\n    from dvc.scm import NoSCMError, SCMError\n\n    config = config or {}\n    if remote and not_in_remote:\n        logger.debug(\"Using remote %r\", remote)\n        core = config.setdefault(\"core\", {})\n        core[\"remote\"] = remote\n    repo.config.merge(config)\n\n    targets = targets or []\n    filter_keys: list[DataIndexKey] = [repo.fs.relparts(os.fspath(t)) for t in targets]\n    # try to remove duplicate and overlapping keys\n    filter_keys = _prune_keys(filter_keys)\n\n    uncommitted_diff = _diff_index_to_wtree(\n        repo,\n        filter_keys=filter_keys,\n        granular=granular,\n        batch_size=batch_size,\n        with_renames=with_renames,\n    )\n    unchanged = set(uncommitted_diff.pop(\"unchanged\", []))\n\n    entries_not_in_remote: list[str] = []\n    if not_in_remote:\n        entries_not_in_remote = _get_entries_not_in_remote(\n            repo,\n            filter_keys=filter_keys,\n            granular=granular,\n            remote_refresh=remote_refresh,\n        )\n\n    try:\n        committed_diff = _diff_head_to_index(\n            repo,\n            filter_keys=filter_keys,\n            head=head,\n            granular=granular,\n            with_renames=with_renames,\n        )\n    except (SCMError, NoSCMError):\n        committed_diff = {}\n    else:\n        unchanged &= set(committed_diff.pop(\"unchanged\", []))\n\n    git_info = _git_info(repo.scm, untracked_files=untracked_files)\n    scm_filter_targets = {\n        os.path.relpath(os.path.abspath(t), repo.scm.root_dir) for t in targets\n    }\n    untracked_it: Iterable[str] = git_info.get(\"untracked\", [])\n    if scm_filter_targets:\n        untracked_it = (\n            f for f in untracked_it if _matches_target(f, scm_filter_targets)\n        )\n    untracked = _transform_git_paths_to_dvc(repo, untracked_it)\n    # order matters here\n    return Status(\n        not_in_cache=uncommitted_diff.pop(\"not_in_cache\", []),\n        not_in_remote=entries_not_in_remote,\n        committed=committed_diff,\n        uncommitted=uncommitted_diff,\n        untracked=untracked,\n        unchanged=list(unchanged),\n        git=git_info,\n    )\n"
  },
  {
    "path": "dvc/repo/datasets.py",
    "content": "import os\nfrom collections.abc import Iterator, Mapping\nfrom datetime import datetime\nfrom functools import cached_property\nfrom pathlib import Path\nfrom typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union, cast\nfrom urllib.parse import urlparse\n\nfrom attrs import Attribute, AttrsInstance, asdict, evolve, field, fields, frozen\nfrom attrs.converters import default_if_none\n\nfrom dvc.dvcfile import Lockfile, ProjectFile\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.types import StrPath\nfrom dvc_data.hashfile.meta import Meta\n\nif TYPE_CHECKING:\n    from datachain.dataset import DatasetRecord, DatasetVersion  # type: ignore[import]\n    from typing_extensions import Self\n\n    from dvc.repo import Repo\n\n\nlogger = logger.getChild(__name__)\n\n\ndef _get_dataset_record(name: str) -> \"DatasetRecord\":\n    from dvc.exceptions import DvcException\n\n    try:\n        from datachain.catalog import get_catalog  # type: ignore[import]\n\n    except ImportError as exc:\n        raise DvcException(\"datachain is not installed\") from exc\n\n    catalog = get_catalog()\n    return catalog.get_remote_dataset(name)\n\n\ndef _get_dataset_info(\n    name: str, record: Optional[\"DatasetRecord\"] = None, version: Optional[int] = None\n) -> \"DatasetVersion\":\n    record = record or _get_dataset_record(name)\n    assert record\n    v = record.latest_version if version is None else version\n    assert v is not None\n    return record.get_version(v)\n\n\ndef default_str(v) -> str:\n    return default_if_none(\"\")(v)\n\n\ndef to_datetime(d: Union[str, datetime]) -> datetime:\n    return datetime.fromisoformat(d) if isinstance(d, str) else d\n\n\ndef ensure(cls):\n    def inner(v):\n        return cls.from_dict(v) if isinstance(v, dict) else v\n\n    return inner\n\n\nclass SerDe:\n    def to_dict(self: AttrsInstance) -> dict[str, Any]:\n        def filter_defaults(attr: Attribute, v: Any):\n            if attr.metadata.get(\"exclude_falsy\", False) and not v:\n                return False\n            return attr.default != v\n\n        def value_serializer(_inst, _field, v):\n            return v.isoformat() if isinstance(v, datetime) else v\n\n        return asdict(self, filter=filter_defaults, value_serializer=value_serializer)\n\n    @classmethod\n    def from_dict(cls, d: dict[str, Any]) -> \"Self\":\n        _fields = fields(cast(\"type[AttrsInstance]\", cls))\n        kwargs = {f.name: d[f.name] for f in _fields if f.name in d}\n        return cls(**kwargs)\n\n\n@frozen(kw_only=True)\nclass DatasetSpec(SerDe):\n    name: str\n    url: str\n    type: Literal[\"dvc\", \"dc\", \"url\"]\n\n\n@frozen(kw_only=True)\nclass DVCDatasetSpec(DatasetSpec):\n    type: Literal[\"dvc\"]\n    path: str = field(default=\"\", converter=default_str)\n    rev: Optional[str] = None\n\n\n@frozen(kw_only=True, order=True)\nclass FileInfo(SerDe):\n    relpath: str\n    meta: Meta = field(order=False, converter=ensure(Meta))  # type: ignore[misc]\n\n\n@frozen(kw_only=True)\nclass DVCDatasetLock(DVCDatasetSpec):\n    rev_lock: str\n\n\n@frozen(kw_only=True)\nclass DatachainDatasetLock(DatasetSpec):\n    version: int\n    created_at: datetime = field(converter=to_datetime)\n\n\n@frozen(kw_only=True)\nclass URLDatasetLock(DatasetSpec):\n    meta: Meta = field(converter=ensure(Meta))  # type: ignore[misc]\n    files: list[FileInfo] = field(\n        factory=list,\n        converter=lambda f: sorted(map(ensure(FileInfo), f)),\n        metadata={\"exclude_falsy\": True},\n    )\n\n\ndef to_spec(lock: \"Lock\") -> \"Spec\":\n    cls = DVCDatasetSpec if lock.type == \"dvc\" else DatasetSpec\n    return cls(**{f.name: getattr(lock, f.name) for f in fields(cls)})\n\n\n@frozen(kw_only=True)\nclass DVCDataset:\n    manifest_path: str\n    spec: DVCDatasetSpec\n    lock: Optional[DVCDatasetLock] = None\n    _invalidated: bool = field(default=False, eq=False, repr=False)\n\n    type: ClassVar[Literal[\"dvc\"]] = \"dvc\"\n\n    def update(self, repo, rev: Optional[str] = None, **kwargs) -> \"Self\":\n        from dvc.dependency import RepoDependency\n\n        spec = self.spec\n        if rev:\n            spec = evolve(self.spec, rev=rev)\n\n        def_repo = {\n            RepoDependency.PARAM_REV: spec.rev,\n            RepoDependency.PARAM_URL: spec.url,\n        }\n        dep = RepoDependency(def_repo, None, spec.path, repo=repo)  # type: ignore[arg-type]\n        dep.save()\n        d = dep.dumpd()\n\n        repo_info = d[RepoDependency.PARAM_REPO]\n        assert isinstance(repo_info, dict)\n        rev_lock = repo_info[RepoDependency.PARAM_REV_LOCK]\n        lock = DVCDatasetLock(**spec.to_dict(), rev_lock=rev_lock)\n        return evolve(self, spec=spec, lock=lock)\n\n\n@frozen(kw_only=True)\nclass DatachainDataset:\n    manifest_path: str\n    spec: \"DatasetSpec\"\n    lock: \"Optional[DatachainDatasetLock]\" = field(default=None)\n    _invalidated: bool = field(default=False, eq=False, repr=False)\n\n    type: ClassVar[Literal[\"dc\"]] = \"dc\"\n\n    @property\n    def pinned(self) -> bool:\n        return self.name_version[1] is not None\n\n    @property\n    def name_version(self) -> tuple[str, Optional[int]]:\n        url = urlparse(self.spec.url)\n        path = url.netloc + url.path\n        parts = path.split(\"@v\")\n        assert parts\n\n        name = parts[0]\n        version = int(parts[1]) if len(parts) > 1 else None\n        return name, version\n\n    def update(\n        self,\n        repo,  # noqa: ARG002\n        record: Optional[\"DatasetRecord\"] = None,\n        version: Optional[int] = None,\n        **kwargs,\n    ) -> \"Self\":\n        name, _version = self.name_version\n        version = version if version is not None else _version\n        version_info = _get_dataset_info(name, record=record, version=version)\n        lock = DatachainDatasetLock(\n            **self.spec.to_dict(),\n            version=version_info.version,\n            created_at=version_info.created_at,\n        )\n        return evolve(self, lock=lock)\n\n\n@frozen(kw_only=True)\nclass URLDataset:\n    manifest_path: str\n    spec: \"DatasetSpec\"\n    lock: \"Optional[URLDatasetLock]\" = None\n    _invalidated: bool = field(default=False, eq=False, repr=False)\n\n    type: ClassVar[Literal[\"url\"]] = \"url\"\n\n    def update(self, repo, **kwargs):\n        from dvc.dependency import Dependency\n\n        dep = Dependency(\n            None, self.spec.url, repo=repo, fs_config={\"version_aware\": True}\n        )\n        dep.save()\n        d = dep.dumpd(datasets=True)\n        files = [\n            FileInfo(relpath=info[\"relpath\"], meta=Meta.from_dict(info))\n            for info in d.get(\"files\", [])\n        ]\n        lock = URLDatasetLock(**self.spec.to_dict(), meta=dep.meta, files=files)\n        return evolve(self, lock=lock)\n\n\nLock = Union[DVCDatasetLock, DatachainDatasetLock, URLDatasetLock]\nSpec = Union[DatasetSpec, DVCDatasetSpec]\nDataset = Union[DVCDataset, DatachainDataset, URLDataset]\n\n\nclass DatasetNotFoundError(DvcException, KeyError):\n    def __init__(self, name, *args):\n        self.name = name\n        super().__init__(\"dataset not found\", *args)\n\n    def __str__(self) -> str:\n        return self.msg\n\n\nclass Datasets(Mapping[str, Dataset]):\n    def __init__(self, repo: \"Repo\") -> None:\n        self.repo: Repo = repo\n\n    def __repr__(self):\n        return repr(dict(self))\n\n    def __rich_repr__(self):\n        yield dict(self)\n\n    def __getitem__(self, name: str) -> Dataset:\n        try:\n            return self._datasets[name]\n        except KeyError as exc:\n            raise DatasetNotFoundError(name) from exc\n\n    def __setitem__(self, name: str, dataset: Dataset) -> None:\n        self._datasets[name] = dataset\n\n    def __contains__(self, name: object) -> bool:\n        return name in self._datasets\n\n    def __iter__(self) -> Iterator[str]:\n        return iter(self._datasets)\n\n    def __len__(self) -> int:\n        return len(self._datasets)\n\n    @cached_property\n    def _spec(self) -> dict[str, tuple[str, dict[str, Any]]]:\n        return {\n            dataset[\"name\"]: (path, dataset)\n            for path, datasets in self.repo.index._datasets.items()\n            for dataset in datasets\n        }\n\n    @cached_property\n    def _lock(self) -> dict[str, Optional[dict[str, Any]]]:\n        datasets_lock = self.repo.index._datasets_lock\n\n        def find(path, name) -> Optional[dict[str, Any]]:\n            # only look for `name` in the lock file next to the\n            # corresponding `dvc.yaml` file\n            lock = datasets_lock.get(path, [])\n            return next((dataset for dataset in lock if dataset[\"name\"] == name), None)\n\n        return {ds[\"name\"]: find(path, name) for name, (path, ds) in self._spec.items()}\n\n    @cached_property\n    def _datasets(self) -> dict[str, Dataset]:\n        return {\n            name: self._build_dataset(path, spec, self._lock[name])\n            for name, (path, spec) in self._spec.items()\n        }\n\n    def _reset(self) -> None:\n        self.__dict__.pop(\"_spec\", None)\n        self.__dict__.pop(\"_lock\", None)\n        self.__dict__.pop(\"_datasets\", None)\n\n    @staticmethod\n    def _spec_from_info(spec: dict[str, Any]) -> Spec:\n        typ = spec.get(\"type\")\n        if not typ:\n            raise ValueError(\"type should be present in spec\")\n        if typ == \"dvc\":\n            return DVCDatasetSpec.from_dict(spec)\n        if typ in {\"dc\", \"url\"}:\n            return DatasetSpec.from_dict(spec)\n        raise ValueError(f\"unknown dataset type: {spec.get('type', '')}\")\n\n    @staticmethod\n    def _lock_from_info(lock: Optional[dict[str, Any]]) -> Optional[Lock]:\n        kl = {\"dvc\": DVCDatasetLock, \"dc\": DatachainDatasetLock, \"url\": URLDatasetLock}\n        if lock and (cls := kl.get(lock.get(\"type\", \"\"))):  # type: ignore[assignment]\n            return cls.from_dict(lock)  # type: ignore[attr-defined]\n        return None\n\n    @classmethod\n    def _build_dataset(\n        cls,\n        manifest_path: str,\n        spec_data: dict[str, Any],\n        lock_data: Optional[dict[str, Any]] = None,\n    ) -> Dataset:\n        _invalidated = False\n        spec = cls._spec_from_info(spec_data)\n        lock = cls._lock_from_info(lock_data)\n        # if dvc.lock and dvc.yaml file are not in sync, we invalidate the lock.\n        if lock is not None and to_spec(lock) != spec:\n            logger.debug(\n                \"invalidated lock data for %s in %s\",\n                spec.name,\n                manifest_path,\n            )\n            _invalidated = True  # signal is used during `dvc repro`/`dvc status`.\n            lock = None\n\n        assert isinstance(spec, DatasetSpec)\n        if spec.type == \"dvc\":\n            assert lock is None or isinstance(lock, DVCDatasetLock)\n            assert isinstance(spec, DVCDatasetSpec)\n            return DVCDataset(\n                manifest_path=manifest_path,\n                spec=spec,\n                lock=lock,\n                invalidated=_invalidated,\n            )\n        if spec.type == \"url\":\n            assert lock is None or isinstance(lock, URLDatasetLock)\n            return URLDataset(\n                manifest_path=manifest_path,\n                spec=spec,\n                lock=lock,\n                invalidated=_invalidated,\n            )\n        if spec.type == \"dc\":\n            assert lock is None or isinstance(lock, DatachainDatasetLock)\n            return DatachainDataset(\n                manifest_path=manifest_path,\n                spec=spec,\n                lock=lock,\n                invalidated=_invalidated,\n            )\n        raise ValueError(f\"unknown dataset type: {spec.type!r}\")\n\n    def add(\n        self,\n        name: str,\n        url: str,\n        type: str,  # noqa: A002\n        manifest_path: StrPath = \"dvc.yaml\",\n        **kwargs: Any,\n    ) -> Dataset:\n        assert type in {\"dvc\", \"dc\", \"url\"}\n        kwargs.update({\"name\": name, \"url\": url, \"type\": type})\n        dataset = self._build_dataset(os.path.abspath(manifest_path), kwargs)\n        dataset = dataset.update(self.repo)\n\n        self.dump(dataset)\n        self[name] = dataset\n        return dataset\n\n    def update(self, name, **kwargs) -> tuple[Dataset, Dataset]:\n        dataset = self[name]\n        version = kwargs.get(\"version\")\n\n        if dataset.type == \"url\" and (version or kwargs.get(\"rev\")):\n            raise ValueError(\"cannot update version/revision for a url\")\n        if dataset.type == \"dc\" and version is not None:\n            if not isinstance(version, int):\n                raise TypeError(\n                    \"DataChain dataset version has to be an integer, \"\n                    f\"got {type(version).__name__!r}\"\n                )\n            if version < 1:\n                raise ValueError(\n                    f\"DataChain dataset version should be >=1, got {version}\"\n                )\n\n        new = dataset.update(self.repo, **kwargs)\n\n        self.dump(new, old=dataset)\n        self[name] = new\n        return dataset, new\n\n    def _dump_spec(self, manifest_path: StrPath, spec: Spec) -> None:\n        spec_data = spec.to_dict()\n        assert spec_data.keys() & {\"type\", \"name\", \"url\"}\n        project_file = ProjectFile(self.repo, manifest_path)\n        project_file.dump_dataset(spec_data)\n\n    def _dump_lock(self, manifest_path: StrPath, lock: Lock) -> None:\n        lock_data = lock.to_dict()\n        assert lock_data.keys() & {\"type\", \"name\", \"url\"}\n        lockfile = Lockfile(self.repo, Path(manifest_path).with_suffix(\".lock\"))\n        lockfile.dump_dataset(lock_data)\n\n    def dump(self, dataset: Dataset, old: Optional[Dataset] = None) -> None:\n        if not old or old.spec != dataset.spec:\n            self._dump_spec(dataset.manifest_path, dataset.spec)\n        if dataset.lock and (not old or old.lock != dataset.lock):\n            self._dump_lock(dataset.manifest_path, dataset.lock)\n"
  },
  {
    "path": "dvc/repo/destroy.py",
    "content": "from dvc.ignore import destroy as destroy_dvcignore\nfrom dvc.utils.fs import remove\n\nfrom . import locked\n\n\n@locked\ndef _destroy_stages(repo):\n    for stage in repo.index.stages:\n        stage.unprotect_outs()\n        stage.dvcfile.remove(force=True)\n\n\n# NOTE: not locking `destroy`, as `remove` will need to delete `.dvc` dir,\n# which will cause issues on Windows, as `.dvc/lock` will be busy.\ndef destroy(repo):\n    _destroy_stages(repo)\n    repo.close()\n    destroy_dvcignore(repo.root_dir)\n    remove(repo.dvc_dir)\n"
  },
  {
    "path": "dvc/repo/diff.py",
    "content": "import errno\nimport os\nfrom collections import defaultdict\nfrom typing import Optional\n\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.ui import ui\n\nlogger = logger.getChild(__name__)\n\n\ndef _path(entry):\n    if entry and entry.meta and entry.meta.isdir:\n        return os.path.join(*entry.key, \"\")\n    return os.path.join(*entry.key)\n\n\ndef _hash(entry):\n    if entry and entry.hash_info:\n        return entry.hash_info.value\n    return None\n\n\ndef _diff(old, new, data_keys, with_missing=False):\n    from dvc_data.index.diff import ADD, DELETE, MODIFY, RENAME\n    from dvc_data.index.diff import diff as idiff\n\n    ret: dict[str, list[dict]] = {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [],\n        \"not in cache\": [],\n    }\n\n    def meta_cmp_key(meta):\n        if not meta:\n            return meta\n        return meta.isdir\n\n    for change in idiff(\n        old,\n        new,\n        with_renames=True,\n        meta_cmp_key=meta_cmp_key,\n        roots=data_keys,\n        # Include unknown entries from missing dir entry, so that they don't\n        # get reported as added/modified/deleted.\n        # Also return unchanged entries so that we can check if they are missing\n        # from cache.\n        with_unknown=True,\n        with_unchanged=with_missing,\n    ):\n        if (change.old and change.old.isdir and not change.old.hash_info) or (\n            change.new and change.new.isdir and not change.new.hash_info\n        ):\n            continue\n\n        if change.typ == ADD:\n            ret[\"added\"].append({\"path\": _path(change.new), \"hash\": _hash(change.new)})\n        elif change.typ == DELETE:\n            ret[\"deleted\"].append(\n                {\"path\": _path(change.old), \"hash\": _hash(change.old)}\n            )\n        elif change.typ == MODIFY:\n            ret[\"modified\"].append(\n                {\n                    \"path\": _path(change.old),\n                    \"hash\": {\"old\": _hash(change.old), \"new\": _hash(change.new)},\n                }\n            )\n        elif change.typ == RENAME:\n            ret[\"renamed\"].append(\n                {\n                    \"path\": {\"old\": _path(change.old), \"new\": _path(change.new)},\n                    \"hash\": _hash(change.old),\n                }\n            )\n\n        if (\n            with_missing\n            and change.old\n            and change.old.hash_info\n            and not old.storage_map.cache_exists(change.old)\n        ):\n            ret[\"not in cache\"].append(\n                {\"path\": _path(change.old), \"hash\": _hash(change.old)}\n            )\n\n    return ret if any(ret.values()) else {}\n\n\n@locked\ndef diff(\n    self,\n    a_rev: str = \"HEAD\",\n    b_rev: Optional[str] = None,\n    targets: Optional[list[str]] = None,\n    recursive: bool = False,\n):\n    \"\"\"\n    By default, it compares the workspace with the last commit's fs.\n\n    This implementation differs from `git diff` since DVC doesn't have\n    the concept of `index`, but it keeps the same interface, thus,\n    `dvc diff` would be the same as `dvc diff HEAD`.\n    \"\"\"\n    if self.scm.no_commits:\n        return {}\n\n    indexes = {}\n    missing_targets = defaultdict(set)\n    with_missing = False\n    if not b_rev:\n        b_rev = \"workspace\"\n        with_missing = True\n\n    data_keys = set()\n    for rev in self.brancher(revs=[a_rev, b_rev]):\n        if rev == \"workspace\" and b_rev != \"workspace\":\n            # brancher always returns workspace, but we only need to compute\n            # workspace paths/checksums if b_rev was None\n            continue\n\n        def onerror(target, _exc):\n            missing_targets[rev].add(target)  # noqa: B023\n\n        view = self.index.targets_view(targets, onerror=onerror, recursive=recursive)\n\n        data_keys.update(view.data_keys.get(\"repo\", set()))\n\n        if rev == \"workspace\":\n            from .index import build_data_index\n\n            with ui.status(\"Building workspace index\"):\n                data = build_data_index(view, self.root_dir, self.fs, compute_hash=True)\n        else:\n            data = view.data[\"repo\"]\n\n        assert rev not in indexes\n        indexes[rev] = data\n\n    if targets:\n        old_missing = missing_targets.get(a_rev, set())\n        new_missing = missing_targets.get(b_rev, set())\n\n        # check for overlapping missing targets between a_rev and b_rev\n        for target in old_missing & new_missing:\n            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), target)\n\n    if len(indexes.keys()) == 1:\n        # both a_rev and b_rev point to the same sha, nothing to compare\n        old = None\n        new = None\n    else:\n        old = indexes[a_rev]\n        new = indexes[b_rev]\n\n    with ui.status(\"Calculating diff\"):\n        return _diff(old, new, data_keys, with_missing=with_missing)\n"
  },
  {
    "path": "dvc/repo/du.py",
    "content": "from typing import Any, Optional, Union\n\n\ndef du(\n    url: str,\n    path: Optional[str] = None,\n    rev: Optional[str] = None,\n    summarize: bool = False,\n    config: Union[dict[str, Any], str, None] = None,\n    remote: Optional[str] = None,\n    remote_config: Optional[dict] = None,\n):\n    from dvc.config import Config\n\n    from . import Repo\n\n    if config and not isinstance(config, dict):\n        config_dict = Config.load_file(config)\n    else:\n        config_dict = None\n\n    with Repo.open(\n        url,\n        rev=rev,\n        subrepos=True,\n        uninitialized=True,\n        config=config_dict,\n        remote=remote,\n        remote_config=remote_config,\n    ) as repo:\n        path = path or \"\"\n\n        fs = repo.dvcfs\n\n        if summarize or not fs.isdir(path):\n            return [(path, fs.du(path, total=True))]\n\n        ret = [\n            (entry_path, fs.du(entry_path, total=True)) for entry_path in fs.ls(path)\n        ]\n        ret.append((path, sum(entry[1] for entry in ret)))\n        return ret\n"
  },
  {
    "path": "dvc/repo/experiments/__init__.py",
    "content": "import os\nimport re\nfrom collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import chain, first\n\nfrom dvc.log import logger\nfrom dvc.ui import ui\nfrom dvc.utils import relpath\nfrom dvc.utils.objects import cached_property\n\nfrom .cache import ExpCache\nfrom .exceptions import (\n    BaselineMismatchError,\n    ExperimentExistsError,\n    InvalidExpRefError,\n    MultipleBranchError,\n)\nfrom .refs import (\n    APPLY_STASH,\n    CELERY_FAILED_STASH,\n    CELERY_STASH,\n    EXEC_APPLY,\n    EXEC_NAMESPACE,\n    EXPS_NAMESPACE,\n    WORKSPACE_STASH,\n    ExpRefInfo,\n)\nfrom .stash import ApplyStash\nfrom .utils import check_ref_format, exp_refs_by_rev, unlocked_repo\n\nif TYPE_CHECKING:\n    from .queue.base import BaseStashQueue, QueueEntry\n    from .queue.celery import LocalCeleryQueue\n    from .queue.tempdir import TempDirQueue\n    from .queue.workspace import WorkspaceQueue\n    from .stash import ExpStashEntry\n\nlogger = logger.getChild(__name__)\n\n\nclass Experiments:\n    \"\"\"Class that manages experiments in a DVC repo.\n\n    Args:\n        repo (dvc.repo.Repo): repo instance that these experiments belong to.\n    \"\"\"\n\n    BRANCH_RE = re.compile(r\"^(?P<baseline_rev>[a-f0-9]{7})-(?P<exp_sha>[a-f0-9]+)\")\n\n    def __init__(self, repo):\n        from dvc.scm import NoSCMError\n\n        if repo.config[\"core\"].get(\"no_scm\", False):\n            raise NoSCMError\n\n        self.repo = repo\n\n    @property\n    def scm(self):\n        from dvc.scm import SCMError\n\n        if self.repo.scm.no_commits:\n            raise SCMError(\"Empty Git repo. Add a commit to use experiments.\")\n\n        return self.repo.scm\n\n    @cached_property\n    def dvc_dir(self) -> str:\n        return relpath(self.repo.dvc_dir, self.repo.scm.root_dir)\n\n    @cached_property\n    def args_file(self) -> str:\n        from .executor.base import BaseExecutor\n\n        return os.path.join(self.repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE)\n\n    @cached_property\n    def workspace_queue(self) -> \"WorkspaceQueue\":\n        from .queue.workspace import WorkspaceQueue\n\n        return WorkspaceQueue(self.repo, WORKSPACE_STASH)\n\n    @cached_property\n    def tempdir_queue(self) -> \"TempDirQueue\":\n        from .queue.tempdir import TempDirQueue\n\n        # NOTE: tempdir and workspace stash is shared since both\n        # implementations immediately push -> pop (queue length is only 0 or 1)\n        return TempDirQueue(self.repo, WORKSPACE_STASH)\n\n    @cached_property\n    def celery_queue(self) -> \"LocalCeleryQueue\":\n        from .queue.celery import LocalCeleryQueue\n\n        return LocalCeleryQueue(self.repo, CELERY_STASH, CELERY_FAILED_STASH)\n\n    @cached_property\n    def apply_stash(self) -> ApplyStash:\n        return ApplyStash(self.scm, APPLY_STASH)\n\n    @cached_property\n    def cache(self) -> ExpCache:\n        return ExpCache(self.repo)\n\n    @property\n    def stash_revs(self) -> dict[str, \"ExpStashEntry\"]:\n        revs = {}\n        for queue in (self.workspace_queue, self.celery_queue):\n            revs.update(queue.stash.stash_revs)\n        return revs\n\n    def reproduce_one(\n        self,\n        tmp_dir: bool = False,\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ):\n        \"\"\"Reproduce and checkout a single (standalone) experiment.\"\"\"\n        exp_queue: BaseStashQueue = (\n            self.tempdir_queue if tmp_dir else self.workspace_queue\n        )\n        self.queue_one(exp_queue, **kwargs)\n        results = self._reproduce_queue(\n            exp_queue, copy_paths=copy_paths, message=message\n        )\n        exp_rev = first(results)\n        if exp_rev is not None:\n            self._log_reproduced(results, tmp_dir=tmp_dir)\n        return results\n\n    def queue_one(self, queue: \"BaseStashQueue\", **kwargs) -> \"QueueEntry\":\n        \"\"\"Queue a single experiment.\"\"\"\n        return self.new(queue, **kwargs)\n\n    def reproduce_celery(\n        self, entries: Optional[Iterable[\"QueueEntry\"]] = None, **kwargs\n    ) -> dict[str, str]:\n        results: dict[str, str] = {}\n        if entries is None:\n            entries = list(\n                chain(self.celery_queue.iter_active(), self.celery_queue.iter_queued())\n            )\n\n        logger.debug(\"reproduce all these entries '%s'\", entries)\n\n        if not entries:\n            return results\n\n        self.celery_queue.start_workers(count=kwargs.get(\"jobs\", 1))\n        failed = []\n        try:\n            ui.write(\n                \"Following logs for all queued experiments. Use Ctrl+C to \"\n                \"stop following logs (experiment execution will continue).\\n\"\n            )\n            for entry in entries:\n                # wait for task execution to start\n                self.celery_queue.wait_for_start(entry, sleep_interval=1)\n                self.celery_queue.follow(entry)\n                # wait for task collection to complete\n                try:\n                    result = self.celery_queue.get_result(entry)\n                except FileNotFoundError:\n                    result = None\n                if result is None or result.exp_hash is None:\n                    name = entry.name or entry.stash_rev[:7]\n                    failed.append(name)\n                elif result.ref_info:\n                    exp_rev = self.scm.get_ref(str(result.ref_info))\n                    results[exp_rev] = result.exp_hash\n        except KeyboardInterrupt:\n            ui.write(\n                \"Experiment(s) are still executing in the background. To \"\n                \"abort execution use 'dvc queue kill' or 'dvc queue stop'.\"\n            )\n        if failed:\n            names = \", \".join(name for name in failed)\n            ui.error(f\"Failed to reproduce experiment(s) '{names}'\")\n        if results:\n            self._log_reproduced((rev for rev in results), True)\n        return results\n\n    def _log_reproduced(self, revs: Iterable[str], tmp_dir: bool = False):\n        names = []\n        rev_names = self.get_exact_name(revs)\n        for rev in revs:\n            name = rev_names[rev]\n            names.append(name if name else rev[:7])\n        ui.write(\"\\nRan experiment(s): {}\".format(\", \".join(names)))\n        if tmp_dir:\n            ui.write(\n                \"To apply the results of an experiment to your workspace \"\n                \"run:\\n\\n\"\n                \"\\tdvc exp apply <exp>\"\n            )\n        else:\n            ui.write(\"Experiment results have been applied to your workspace.\")\n\n    def new(self, queue: \"BaseStashQueue\", *args, **kwargs) -> \"QueueEntry\":\n        \"\"\"Create and enqueue a new experiment.\n\n        Experiment will be derived from the current workspace.\n        \"\"\"\n\n        name = kwargs.get(\"name\")\n        baseline_sha = kwargs.get(\"baseline_rev\") or self.repo.scm.get_rev()\n\n        if name:\n            exp_ref = ExpRefInfo(baseline_sha=baseline_sha, name=name)\n            check_ref_format(self.scm, exp_ref)\n            force = kwargs.get(\"force\", False)\n            if self.scm.get_ref(str(exp_ref)) and not force:\n                raise ExperimentExistsError(exp_ref.name)\n\n        return queue.put(*args, **kwargs)\n\n    def _get_last_applied(self) -> Optional[str]:\n        try:\n            last_applied = self.scm.get_ref(EXEC_APPLY)\n            if last_applied:\n                self.check_baseline(last_applied)\n            return last_applied\n        except BaselineMismatchError:\n            # If HEAD has moved since the last applied experiment,\n            # the applied experiment is no longer relevant\n            self.scm.remove_ref(EXEC_APPLY)\n        return None\n\n    @unlocked_repo\n    def _reproduce_queue(\n        self,\n        queue: \"BaseStashQueue\",\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ) -> dict[str, str]:\n        \"\"\"Reproduce queued experiments.\n\n        Arguments:\n            queue: Experiment queue.\n\n        Returns:\n            dict mapping successfully reproduced experiment revs to their\n            results.\n        \"\"\"\n        exec_results = queue.reproduce(copy_paths=copy_paths, message=message)\n\n        results: dict[str, str] = {}\n        for exp_result in exec_results.values():\n            results.update(exp_result)\n        return results\n\n    def check_baseline(self, exp_rev):\n        baseline_sha = self.repo.scm.get_rev()\n        if exp_rev == baseline_sha:\n            return exp_rev\n\n        exp_baseline = self._get_baseline(exp_rev)\n        if exp_baseline is None:\n            # if we can't tell from branch name, fall back to parent commit\n            exp_commit = self.scm.resolve_commit(exp_rev)\n            if exp_commit:\n                exp_baseline = first(exp_commit.parents)\n        if exp_baseline == baseline_sha:\n            return exp_baseline\n        raise BaselineMismatchError(exp_baseline, baseline_sha)\n\n    def get_baseline(self, rev):\n        \"\"\"Return the baseline rev for an experiment rev.\"\"\"\n        return self._get_baseline(rev)\n\n    def _get_baseline(self, rev):\n        from dvc.scm import resolve_rev\n\n        rev = resolve_rev(self.scm, rev)\n\n        if rev in self.stash_revs:\n            entry = self.stash_revs.get(rev)\n            if entry:\n                return entry.baseline_rev\n            return None\n\n        ref_info = first(exp_refs_by_rev(self.scm, rev))\n        if ref_info:\n            return ref_info.baseline_sha\n        return None\n\n    def get_branch_by_rev(\n        self, rev: str, allow_multiple: bool = False\n    ) -> Optional[str]:\n        \"\"\"Returns full refname for the experiment branch containing rev.\"\"\"\n        ref_infos = list(exp_refs_by_rev(self.scm, rev))\n        if not ref_infos:\n            return None\n        if len(ref_infos) > 1 and not allow_multiple:\n            for ref_info in ref_infos:\n                if self.scm.get_ref(str(ref_info)) == rev:\n                    return str(ref_info)\n            raise MultipleBranchError(rev, ref_infos)\n        return str(ref_infos[0])\n\n    def get_exact_name(self, revs: Iterable[str]) -> dict[str, Optional[str]]:\n        \"\"\"Returns preferred name for the specified revision.\n\n        Prefers tags, branches (heads), experiments in that order.\n        \"\"\"\n        result: dict[str, Optional[str]] = {}\n        exclude = f\"{EXEC_NAMESPACE}/*\"\n        ref_dict = self.scm.describe(revs, base=EXPS_NAMESPACE, exclude=exclude)\n        for rev in revs:\n            name: Optional[str] = None\n            ref = ref_dict[rev]\n            if ref:\n                try:\n                    name = ExpRefInfo.from_ref(ref).name\n                except InvalidExpRefError:\n                    pass\n            if not name:\n                if rev in self.stash_revs:\n                    name = self.stash_revs[rev].name\n                else:\n                    failed_stash = self.celery_queue.failed_stash\n                    if failed_stash and rev in failed_stash.stash_revs:\n                        name = failed_stash.stash_revs[rev].name\n            result[rev] = name\n        return result\n\n    def apply(self, *args, **kwargs):\n        from dvc.repo.experiments.apply import apply\n\n        return apply(self.repo, *args, **kwargs)\n\n    def branch(self, *args, **kwargs):\n        from dvc.repo.experiments.branch import branch\n\n        return branch(self.repo, *args, **kwargs)\n\n    def diff(self, *args, **kwargs):\n        from dvc.repo.experiments.diff import diff\n\n        return diff(self.repo, *args, **kwargs)\n\n    def show(self, *args, **kwargs):\n        from dvc.repo.experiments.show import show\n\n        return show(self.repo, *args, **kwargs)\n\n    def run(self, *args, **kwargs):\n        from dvc.repo.experiments.run import run\n\n        return run(self.repo, *args, **kwargs)\n\n    def save(self, *args, **kwargs):\n        from dvc.repo.experiments.save import save\n\n        return save(self.repo, *args, **kwargs)\n\n    def push(self, *args, **kwargs):\n        from dvc.repo.experiments.push import push\n\n        return push(self.repo, *args, **kwargs)\n\n    def pull(self, *args, **kwargs):\n        from dvc.repo.experiments.pull import pull\n\n        return pull(self.repo, *args, **kwargs)\n\n    def ls(self, *args, **kwargs):\n        from dvc.repo.experiments.ls import ls\n\n        return ls(self.repo, *args, **kwargs)\n\n    def remove(self, *args, **kwargs):\n        from dvc.repo.experiments.remove import remove\n\n        return remove(self.repo, *args, **kwargs)\n\n    def rename(self, *args, **kwargs):\n        from dvc.repo.experiments.rename import rename\n\n        return rename(self.repo, *args, **kwargs)\n\n    def clean(self, *args, **kwargs):\n        from dvc.repo.experiments.clean import clean\n\n        return clean(self.repo, *args, **kwargs)\n"
  },
  {
    "path": "dvc/repo/experiments/apply.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import Git\nfrom dvc.ui import ui\nfrom dvc.utils.fs import remove\n\nfrom .exceptions import BaselineMismatchError, InvalidExpRevError\nfrom .executor.base import BaseExecutor\nfrom .refs import EXEC_APPLY\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.experiments import Experiments\n\nlogger = logger.getChild(__name__)\n\n\n@locked\n@scm_context\ndef apply(repo: \"Repo\", rev: str, **kwargs):\n    from dvc.repo.checkout import checkout as dvc_checkout\n    from dvc.scm import RevError, resolve_rev\n\n    exps: Experiments = repo.experiments\n\n    is_stash: bool = False\n\n    assert isinstance(repo.scm, Git)\n    try:\n        exp_rev = resolve_rev(repo.scm, rev)\n    except RevError as exc:\n        (exp_ref_info, queue_entry) = exps.celery_queue.get_ref_and_entry_by_names(rev)[\n            rev\n        ]\n        if exp_ref_info:\n            exp_rev = repo.scm.get_ref(str(exp_ref_info))\n        elif queue_entry:\n            exp_rev = queue_entry.stash_rev\n            is_stash = True\n        else:\n            raise InvalidExpRevError(rev) from exc\n    except BaselineMismatchError as exc:\n        raise InvalidExpRevError(rev) from exc\n\n    _apply(repo, exp_rev, name=rev, is_stash=is_stash)\n    kwargs[\"force\"] = True\n    dvc_checkout(repo, **kwargs)\n\n    repo.scm.set_ref(EXEC_APPLY, exp_rev)\n    ui.write(\n        f\"Changes for experiment '{rev}' have been applied to your current workspace.\",\n    )\n\n\ndef _apply(repo: \"Repo\", rev: str, name: Optional[str] = None, is_stash: bool = False):\n    exps: Experiments = repo.experiments\n\n    with exps.apply_stash.preserve_workspace(rev, name=name):\n        with repo.scm.detach_head(rev, force=True):\n            if is_stash:\n                assert repo.tmp_dir is not None\n                args_path = os.path.join(repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE)\n                if os.path.exists(args_path):\n                    remove(args_path)\n"
  },
  {
    "path": "dvc/repo/experiments/branch.py",
    "content": "from dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import RevError\n\nfrom .exceptions import InvalidExpRevError\nfrom .utils import exp_refs_by_rev\n\nlogger = logger.getChild(__name__)\n\n\n@locked\n@scm_context\ndef branch(repo, exp_rev, branch_name=None, **kwargs):\n    from dvc.scm import resolve_rev\n\n    try:\n        rev = resolve_rev(repo.scm, exp_rev)\n    except RevError:\n        raise InvalidArgumentError(exp_rev)  # noqa: B904\n    ref_info = None\n\n    ref_infos = list(exp_refs_by_rev(repo.scm, rev))\n    if len(ref_infos) == 1:\n        ref_info = ref_infos[0]\n    elif len(ref_infos) > 1:\n        current_rev = repo.scm.get_rev()\n        for info in ref_infos:\n            if info.baseline_sha == current_rev:\n                ref_info = info\n                break\n        if not ref_info:\n            msg = [\n                (\n                    f\"Ambiguous experiment name '{exp_rev}' can refer to \"\n                    \"multiple experiments. To create a branch use a full \"\n                    \"experiment ref:\"\n                ),\n                \"\",\n            ]\n            msg.extend([str(info) for info in ref_infos])\n            raise InvalidArgumentError(\"\\n\".join(msg))\n\n    if not ref_info:\n        raise InvalidExpRevError(exp_rev)\n\n    branch_name = branch_name or f\"{ref_info.name}-branch\"\n\n    branch_ref = f\"refs/heads/{branch_name}\"\n    if repo.scm.get_ref(branch_ref):\n        raise InvalidArgumentError(f\"Git branch '{branch_name}' already exists.\")\n\n    target = repo.scm.get_ref(str(ref_info))\n    repo.scm.set_ref(\n        branch_ref,\n        target,\n        message=f\"dvc: Created from experiment '{ref_info.name}'\",\n    )\n    fmt = (\n        \"Git branch '%s' has been created from experiment '%s'.\\n\"\n        \"To switch to the new branch run:\\n\\n\"\n        \"\\tgit checkout %s\"\n    )\n    logger.info(fmt, branch_name, ref_info.name, branch_name)\n"
  },
  {
    "path": "dvc/repo/experiments/brancher.py",
    "content": "from collections.abc import Iterator\nfrom contextlib import ExitStack, contextmanager\nfrom typing import TYPE_CHECKING\n\nfrom dvc.repo.experiments.exceptions import InvalidExpRevError\nfrom dvc.scm import RevError\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n\n@contextmanager\ndef switch_repo(repo: \"Repo\", rev: str) -> Iterator[tuple[\"Repo\", str]]:\n    \"\"\"Return a repo instance (brancher) switched to rev.\n\n    If rev is the name of a running experiment, the returned instance will be\n    the live repo wherever the experiment is running.\n\n    NOTE: This will not resolve git SHA's that only exist in queued exp workspaces\n    (it will only match queued exp names).\n    \"\"\"\n    try:\n        with repo.switch(rev):\n            yield repo, rev\n        return\n    except RevError as exc:\n        orig_exc = exc\n    exps = repo.experiments\n\n    if rev == exps.workspace_queue.get_running_exp():\n        yield repo, \"workspace\"\n        return\n\n    for queue in (exps.tempdir_queue, exps.celery_queue):\n        try:\n            active_repo = queue.active_repo(rev)\n        except InvalidExpRevError:\n            continue\n        stack = ExitStack()\n        stack.enter_context(active_repo)\n        stack.enter_context(active_repo.switch(\"workspace\"))\n        with stack:\n            yield active_repo, rev\n        return\n    raise orig_exc\n"
  },
  {
    "path": "dvc/repo/experiments/cache.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Optional, Union\n\nfrom dvc.fs import localfs\nfrom dvc.log import logger\nfrom dvc_objects.db import ObjectDB\n\nfrom .serialize import DeserializeError, SerializableError, SerializableExp\nfrom .utils import EXEC_TMP_DIR\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\nclass ExpCache:\n    \"\"\"Serialized experiment state cache.\n\n    ODB with git SHAs as keys. Objects can be either SerializableExp or\n    SerializableError.\n    \"\"\"\n\n    CACHE_DIR = os.path.join(EXEC_TMP_DIR, \"cache\")\n\n    def __init__(self, repo: \"Repo\"):\n        path = os.path.join(repo.tmp_dir, self.CACHE_DIR)\n        self.odb = ObjectDB(localfs, path)\n\n    def delete(self, rev: str):\n        self.odb.delete(rev)\n\n    def put(\n        self,\n        exp: Union[SerializableExp, SerializableError],\n        rev: Optional[str] = None,\n        force: bool = False,\n    ):\n        rev = rev or getattr(exp, \"rev\", None)\n        assert rev\n        assert rev != \"workspace\"\n        if force or not self.odb.exists(rev):\n            try:\n                self.delete(rev)\n            except FileNotFoundError:\n                pass\n            self.odb.add_bytes(rev, exp.as_bytes())\n            logger.trace(\"ExpCache: cache put '%s'\", rev[:7])\n\n    def get(self, rev: str) -> Optional[Union[SerializableExp, SerializableError]]:\n        obj = self.odb.get(rev)\n        try:\n            with obj.fs.open(obj.path, \"rb\") as fobj:\n                data = fobj.read()\n        except FileNotFoundError:\n            logger.trace(\"ExpCache: cache miss '%s'\", rev[:7])\n            return None\n        for typ in (SerializableExp, SerializableError):\n            try:\n                exp = typ.from_bytes(data)  # type: ignore[attr-defined]\n                logger.trace(\"ExpCache: cache load '%s'\", rev[:7])\n                return exp\n            except DeserializeError:\n                continue\n        logger.debug(\"ExpCache: unknown object type for '%s'\", rev)\n        return None\n"
  },
  {
    "path": "dvc/repo/experiments/clean.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc.ui import ui\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n\ndef clean(repo: \"Repo\"):\n    ui.write(\"Cleaning up dvc-task messages...\")\n    repo.experiments.celery_queue.celery.clean()\n    ui.write(\"Done!\")\n"
  },
  {
    "path": "dvc/repo/experiments/collect.py",
    "content": "import itertools\nimport os\nfrom collections.abc import Collection, Iterable, Iterator\nfrom dataclasses import fields\nfrom datetime import datetime\nfrom typing import TYPE_CHECKING, Optional, Union\n\nfrom funcy import first\nfrom scmrepo.exceptions import SCMError as InnerSCMError\n\nfrom dvc.log import logger\nfrom dvc.scm import Git, SCMError, iter_revs\n\nfrom .exceptions import InvalidExpRefError\nfrom .refs import EXEC_BRANCH, ExpRefInfo\nfrom .serialize import ExpRange, ExpState, SerializableError, SerializableExp\nfrom .utils import describe\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n    from .cache import ExpCache\n\nlogger = logger.getChild(__name__)\n\n\ndef collect_rev(\n    repo: \"Repo\",\n    rev: str,\n    param_deps: bool = False,\n    force: bool = False,\n    cache: Optional[\"ExpCache\"] = None,\n    **kwargs,\n) -> ExpState:\n    \"\"\"Collect experiment state for the given revision.\n\n    Exp will be loaded from cache when available unless rev is 'workspace' or\n    force is set.\n    \"\"\"\n    from dvc.fs import LocalFileSystem\n\n    cache = cache or repo.experiments.cache\n    assert cache\n    # TODO: support filtering serialized exp when param_deps is set\n    if rev != \"workspace\" and not (force or param_deps):\n        cached_exp = cache.get(rev)\n        if cached_exp:\n            if isinstance(cached_exp, SerializableError):\n                return ExpState(rev=rev, error=cached_exp)\n            return ExpState(rev=rev, data=cached_exp)\n    if rev == \"workspace\" and isinstance(repo.fs, LocalFileSystem):\n        orig_cwd: Optional[str] = os.getcwd()\n        os.chdir(repo.root_dir)\n    else:\n        orig_cwd = None\n    try:\n        data = _collect_rev(repo, rev, param_deps=param_deps, force=force, **kwargs)\n        if not (rev == \"workspace\" or param_deps or data.contains_error):\n            cache.put(data, force=True)\n        return ExpState(rev=rev, data=data)\n    except Exception as exc:\n        logger.debug(\"\", exc_info=True)\n        error = SerializableError(str(exc), type(exc).__name__)\n        return ExpState(rev=rev, error=error)\n    finally:\n        if orig_cwd:\n            os.chdir(orig_cwd)\n\n\ndef _collect_rev(\n    repo: \"Repo\",\n    revision: str,\n    param_deps: bool = False,\n    **kwargs,\n) -> SerializableExp:\n    with repo.switch(revision) as rev:\n        if rev == \"workspace\":\n            timestamp: Optional[datetime] = None\n        else:\n            commit = repo.scm.resolve_commit(rev)\n            timestamp = datetime.fromtimestamp(commit.commit_time)  # noqa: DTZ006\n\n        return SerializableExp.from_repo(\n            repo,\n            rev=rev,\n            param_deps=param_deps,\n            timestamp=timestamp,\n        )\n\n\ndef collect_branch(\n    repo: \"Repo\",\n    rev: str,\n    end_rev: Optional[str] = None,\n    **kwargs,\n) -> Iterator[\"ExpState\"]:\n    \"\"\"Iterate over exp states in a Git branch.\n\n    Git branch will be traversed in reverse, starting from rev.\n\n    Args:\n        rev: Branch tip (head).\n        end_rev: If specified, traversal will stop when end_rev is reached\n            (exclusive, end_rev will not be collected).\n    \"\"\"\n    try:\n        for branch_rev in repo.scm.branch_revs(rev, end_rev):\n            yield collect_rev(repo, branch_rev, **kwargs)\n    except (SCMError, InnerSCMError):\n        pass\n\n\ndef collect_exec_branch(\n    repo: \"Repo\",\n    baseline_rev: str,\n    **kwargs,\n) -> Iterator[\"ExpState\"]:\n    \"\"\"Iterate over active experiment branch for the current executor.\"\"\"\n    last_rev = repo.scm.get_ref(EXEC_BRANCH) or repo.scm.get_rev()\n    last_rev = repo.scm.get_rev()\n    yield collect_rev(repo, \"workspace\", **kwargs)\n    if last_rev != baseline_rev:\n        yield from collect_branch(repo, last_rev, baseline_rev, **kwargs)\n\n\ndef collect_queued(\n    repo: \"Repo\",\n    baseline_revs: Collection[str],\n    **kwargs,\n) -> dict[str, list[\"ExpRange\"]]:\n    \"\"\"Collect queued experiments derived from the specified revisions.\n\n    Args:\n        repo: Repo.\n        baseline_revs: Resolved baseline Git SHAs.\n\n    Returns:\n        Dict mapping baseline revision to list of queued experiments.\n    \"\"\"\n    if not baseline_revs:\n        return {}\n    queued_data = {}\n    for rev, ranges in repo.experiments.celery_queue.collect_queued_data(\n        baseline_revs, **kwargs\n    ).items():\n        for exp_range in ranges:\n            for exp_state in exp_range.revs:\n                if exp_state.data:\n                    attrs = [f.name for f in fields(SerializableExp)]\n                    exp_state.data = SerializableExp(\n                        **{\n                            attr: getattr(exp_state.data, attr)\n                            for attr in attrs\n                            if attr != \"metrics\"\n                        }\n                    )\n        queued_data[rev] = ranges\n    return queued_data\n\n\ndef collect_active(\n    repo: \"Repo\",\n    baseline_revs: Collection[str],\n    **kwargs,\n) -> dict[str, list[\"ExpRange\"]]:\n    \"\"\"Collect active (running) experiments derived from the specified revisions.\n\n    Args:\n        repo: Repo.\n        baseline_revs: Resolved baseline Git SHAs.\n\n    Returns:\n        Dict mapping baseline revision to list of active experiments.\n    \"\"\"\n    if not baseline_revs:\n        return {}\n    result: dict[str, list[ExpRange]] = {}\n    exps = repo.experiments\n    for queue in (exps.workspace_queue, exps.tempdir_queue, exps.celery_queue):\n        for baseline, active_exps in queue.collect_active_data(\n            baseline_revs, **kwargs\n        ).items():\n            if baseline in result:\n                result[baseline].extend(active_exps)\n            else:\n                result[baseline] = list(active_exps)\n    return result\n\n\ndef collect_failed(\n    repo: \"Repo\",\n    baseline_revs: Collection[str],\n    **kwargs,\n) -> dict[str, list[\"ExpRange\"]]:\n    \"\"\"Collect failed experiments derived from the specified revisions.\n\n    Args:\n        repo: Repo.\n        baseline_revs: Resolved baseline Git SHAs.\n\n    Returns:\n        Dict mapping baseline revision to list of active experiments.\n    \"\"\"\n    if not baseline_revs:\n        return {}\n    return repo.experiments.celery_queue.collect_failed_data(baseline_revs, **kwargs)\n\n\ndef collect_successful(\n    repo: \"Repo\",\n    baseline_revs: Collection[str],\n    **kwargs,\n) -> dict[str, list[\"ExpRange\"]]:\n    \"\"\"Collect successful experiments derived from the specified revisions.\n\n    Args:\n        repo: Repo.\n        baseline_revs: Resolved baseline Git SHAs.\n\n    Returns:\n        Dict mapping baseline revision to successful experiments.\n    \"\"\"\n    result: dict[str, list[ExpRange]] = {}\n    for baseline_rev in baseline_revs:\n        result[baseline_rev] = list(_collect_baseline(repo, baseline_rev, **kwargs))\n    return result\n\n\ndef _collect_baseline(\n    repo: \"Repo\",\n    baseline_rev: str,\n    **kwargs,\n) -> Iterator[\"ExpRange\"]:\n    \"\"\"Iterate over experiments derived from a baseline revision.\n\n    Args:\n        repo: Repo.\n        baseline_revs: Resolved baseline Git SHAs.\n\n    Yields:\n        Tuple of (timestamp, exp_range).\n    \"\"\"\n    ref_info = ExpRefInfo(baseline_sha=baseline_rev)\n    refs: Optional[Iterable[str]] = kwargs.get(\"refs\")\n    if refs:\n        ref_it = (ref for ref in iter(refs) if ref.startswith(str(ref_info)))\n    else:\n        ref_it = repo.scm.iter_refs(base=str(ref_info))\n    executors = repo.experiments.celery_queue.collect_success_executors([baseline_rev])\n    for ref in ref_it:\n        try:\n            ref_info = ExpRefInfo.from_ref(ref)\n            exp_rev = repo.scm.get_ref(ref)\n            if not exp_rev:\n                continue\n        except (InvalidExpRefError, SCMError, InnerSCMError):\n            continue\n        exps = list(collect_branch(repo, exp_rev, baseline_rev, **kwargs))\n        if exps:\n            exps[0].name = ref_info.name\n            yield ExpRange(\n                exps,\n                name=ref_info.name,\n                executor=executors.get(str(ref_info)),\n            )\n\n\ndef collect(\n    repo: \"Repo\",\n    revs: Union[list[str], str, None] = None,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    num: int = 1,\n    hide_queued: bool = False,\n    hide_failed: bool = False,\n    hide_workspace: bool = False,\n    sha_only: bool = False,\n    **kwargs,\n) -> list[\"ExpState\"]:\n    \"\"\"Collect baseline revisions and derived experiments.\"\"\"\n    assert isinstance(repo.scm, Git)\n    if repo.scm.no_commits:\n        return []\n    if not any([revs, all_branches, all_tags, all_commits]):\n        revs = [\"HEAD\"]\n    if isinstance(revs, str):\n        revs = [revs]\n    cached_refs = list(repo.scm.iter_refs())\n    baseline_revs = list(\n        iter_revs(\n            repo.scm,\n            revs=revs,\n            num=num,\n            all_branches=all_branches,\n            all_tags=all_tags,\n            all_commits=all_commits,\n        )\n    )\n    if sha_only:\n        baseline_names: dict[str, Optional[str]] = {}\n    else:\n        baseline_names = describe(\n            repo.scm, baseline_revs, refs=cached_refs, logger=logger\n        )\n\n    result: list[ExpState] = []\n    if not hide_workspace:\n        workspace_data = collect_rev(repo, \"workspace\", **kwargs)\n        result.append(workspace_data)\n\n    queued = collect_queued(repo, baseline_revs, **kwargs) if not hide_queued else {}\n    active = collect_active(repo, baseline_revs, **kwargs)\n    failed = collect_failed(repo, baseline_revs, **kwargs) if not hide_failed else {}\n    successful = collect_successful(repo, baseline_revs, **kwargs)\n\n    for baseline_rev in baseline_revs:\n        baseline_data = collect_rev(repo, baseline_rev)\n        experiments = list(\n            itertools.chain.from_iterable(\n                _sorted_ranges(collected.get(baseline_rev, []))\n                for collected in (active, successful, queued, failed)\n            )\n        )\n        result.append(\n            ExpState(\n                rev=baseline_rev,\n                name=baseline_names.get(baseline_rev),\n                data=baseline_data.data,\n                error=baseline_data.error,\n                experiments=experiments if experiments else None,\n            )\n        )\n    return result\n\n\ndef _sorted_ranges(exp_ranges: Iterable[\"ExpRange\"]) -> list[\"ExpRange\"]:\n    \"\"\"Return list of ExpRange sorted by (timestamp, rev).\"\"\"\n\n    def _head_timestamp(exp_range: \"ExpRange\") -> tuple[datetime, str]:\n        head_exp = first(exp_range.revs)\n        if head_exp and head_exp.data and head_exp.data.timestamp:\n            return head_exp.data.timestamp, head_exp.rev\n\n        return datetime.fromtimestamp(0), \"\"  # noqa: DTZ006\n\n    return sorted(exp_ranges, key=_head_timestamp, reverse=True)\n"
  },
  {
    "path": "dvc/repo/experiments/diff.py",
    "content": "from dvc.log import logger\nfrom dvc.utils.diff import diff as _diff\nfrom dvc.utils.diff import format_dict\n\nlogger = logger.getChild(__name__)\n\n\ndef diff(repo, *args, a_rev=None, b_rev=None, param_deps=False, **kwargs):\n    from dvc.repo.experiments.collect import collect_rev\n    from dvc.scm import resolve_rev\n\n    if repo.scm.no_commits:\n        return {}\n\n    if a_rev:\n        rev = resolve_rev(repo.scm, a_rev)\n    else:\n        rev = resolve_rev(repo.scm, \"HEAD\")\n    old = collect_rev(repo, rev, param_deps=param_deps)\n\n    if b_rev:\n        rev = resolve_rev(repo.scm, b_rev)\n    else:\n        rev = \"workspace\"\n    new = collect_rev(repo, rev, param_deps=param_deps)\n\n    with_unchanged = kwargs.pop(\"all\", False)\n    return {\n        key: _diff(\n            format_dict(getattr(old.data, key, {})),\n            format_dict(getattr(new.data, key, {})),\n            with_unchanged=with_unchanged,\n        )\n        for key in [\"metrics\", \"params\"]\n    }\n"
  },
  {
    "path": "dvc/repo/experiments/exceptions.py",
    "content": "from collections.abc import Collection, Iterable\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.exceptions import DvcException, InvalidArgumentError\n\nif TYPE_CHECKING:\n    from .refs import ExpRefInfo\n\n\nclass BaselineMismatchError(DvcException):\n    def __init__(self, rev, expected):\n        if hasattr(rev, \"hexsha\"):\n            rev = rev.hexsha\n        rev_str = f\"{rev[:7]}\" if rev is not None else \"invalid commit\"\n        super().__init__(\n            f\"Experiment derived from '{rev_str}', expected '{expected[:7]}'.\"\n        )\n        self.rev = rev\n        self.expected_rev = expected\n\n\nclass ExperimentExistsError(DvcException):\n    def __init__(self, name: str, command: str = \"run\"):\n        msg = (\n            \"Experiment conflicts with existing experiment \"\n            f\"'{name}'. To overwrite the existing experiment run:\\n\\n\"\n            f\"\\tdvc exp {command} -f ...\\n\\n\"\n        )\n        super().__init__(msg)\n        self.name = name\n\n\nclass InvalidExpRefError(DvcException):\n    def __init__(self, ref):\n        super().__init__(f\"'{ref}' is not a valid experiment refname.\")\n        self.ref = ref\n\n\nclass InvalidExpRevError(InvalidArgumentError):\n    def __init__(self, rev):\n        super().__init__(f\"'{rev}' does not appear to be an experiment commit.\")\n\n\nclass MultipleBranchError(DvcException):\n    def __init__(self, rev, ref_infos):\n        super().__init__(\n            f\"Ambiguous commit '{rev[:7]}' belongs to multiple experiment branches.\"\n        )\n        self.rev = rev\n        self.ref_infos = ref_infos\n\n\nclass AmbiguousExpRefInfo(InvalidArgumentError):  # noqa: N818\n    def __init__(self, exp_name: str, exp_ref_list: Iterable[\"ExpRefInfo\"]):\n        msg = [\n            (\n                f\"Ambiguous name '{exp_name}' refers to multiple experiments.\"\n                \" Use one of the following full refnames instead:\"\n            ),\n            \"\",\n        ]\n        msg.extend([f\"\\t{info}\" for info in exp_ref_list])\n        super().__init__(\"\\n\".join(msg))\n\n\nclass UnresolvedExpNamesError(InvalidArgumentError):\n    NAME = \"experiment name\"\n\n    def __init__(\n        self,\n        unresolved_list: Collection[str],\n        *args,\n        git_remote: Optional[str] = None,\n    ):\n        unresolved_names = \"; \".join(unresolved_list)\n        if not git_remote:\n            if len(unresolved_list) > 1:\n                super().__init__(f\"'{unresolved_names}' are not valid {self.NAME}s\")\n            else:\n                super().__init__(f\"'{unresolved_names}' is not a valid {self.NAME}\")\n        else:\n            super().__init__(\n                f\"Experiment '{unresolved_names}' does not exist in '{git_remote}'\"\n            )\n\n\nclass UnresolvedQueueExpNamesError(UnresolvedExpNamesError):\n    NAME = \"queued experiment name\"\n\n\nclass UnresolvedRunningExpNamesError(UnresolvedExpNamesError):\n    NAME = \"running experiment name\"\n\n\nclass ExpQueueEmptyError(DvcException):\n    pass\n\n\nclass ExpNotStartedError(DvcException):\n    def __init__(self, name: str):\n        super().__init__(\n            f\"Queued experiment '{name}' exists but has not started running yet\"\n        )\n"
  },
  {
    "path": "dvc/repo/experiments/executor/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/repo/experiments/executor/base.py",
    "content": "import os\nimport pickle\nimport shutil\nfrom abc import ABC, abstractmethod\nfrom collections.abc import Iterable, Iterator\nfrom contextlib import AbstractContextManager, contextmanager\nfrom dataclasses import asdict, dataclass\nfrom enum import IntEnum\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any, Callable, NamedTuple, Optional, Union\n\nimport funcy\nfrom funcy import nullcontext\nfrom scmrepo.exceptions import SCMError\n\nfrom dvc.env import DVC_EXP_AUTO_PUSH, DVC_EXP_GIT_REMOTE\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.logger import set_loggers_level\nfrom dvc.repo.experiments.exceptions import ExperimentExistsError\nfrom dvc.repo.experiments.refs import EXEC_BASELINE, EXEC_BRANCH, ExpRefInfo\nfrom dvc.repo.experiments.utils import to_studio_params\nfrom dvc.repo.metrics.show import _collect_top_level_metrics\nfrom dvc.repo.params.show import _collect_top_level_params\nfrom dvc.stage.serialize import to_lockfile\nfrom dvc.utils import dict_sha256, env2bool, relpath\nfrom dvc.utils.fs import remove\nfrom dvc.utils.studio import (\n    env_to_config,\n    get_repo_url,\n    get_subrepo_relpath,\n)\n\nif TYPE_CHECKING:\n    from queue import Queue\n\n    from typing_extensions import Self\n\n    from dvc.repo import Repo\n    from dvc.repo.experiments.stash import ExpStashEntry\n    from dvc.scm import Git\n    from dvc.stage import PipelineStage, Stage\n\nlogger = logger.getChild(__name__)\n\n\nclass ExecutorResult(NamedTuple):\n    exp_hash: Optional[str]\n    ref_info: Optional[\"ExpRefInfo\"]\n    force: bool\n\n\nclass TaskStatus(IntEnum):\n    PENDING = 0\n    PREPARING = 1\n    RUNNING = 2\n    SUCCESS = 3\n    FAILED = 4\n    CANCELED = 5\n    FINISHED = 6\n\n\n@dataclass\nclass ExecutorInfo:\n    git_url: str\n    baseline_rev: str\n    location: str\n    root_dir: str\n    dvc_dir: str\n    name: Optional[str] = None\n    wdir: Optional[str] = None\n    result_hash: Optional[str] = None\n    result_ref: Optional[str] = None\n    result_force: bool = False\n    status: TaskStatus = TaskStatus.PENDING\n\n    @classmethod\n    def from_dict(cls, d):\n        if d.pop(\"collected\", None):\n            d[\"status\"] = TaskStatus.FINISHED\n        return cls(**d)\n\n    def asdict(self):\n        return asdict(self)\n\n    @property\n    def result(self) -> Optional[\"ExecutorResult\"]:\n        if self.result_hash is None:\n            return None\n        return ExecutorResult(\n            self.result_hash,\n            ExpRefInfo.from_ref(self.result_ref) if self.result_ref else None,\n            self.result_force,\n        )\n\n    def dump_json(self, filename: str):\n        from dvc.utils.serialize import modify_json\n\n        os.makedirs(os.path.dirname(filename), exist_ok=True)\n        with modify_json(filename) as d:\n            d.update(self.asdict())\n\n    @classmethod\n    def load_json(cls, filename: str) -> \"ExecutorInfo\":\n        from dvc.utils.serialize import load_json\n\n        return cls.from_dict(load_json(filename))\n\n\nclass BaseExecutor(ABC):\n    \"\"\"Base class for executing experiments in parallel.\n\n    Parameters:\n        root_dir: Path to SCM root.\n        dvc_dir: Path to .dvc dir relative to SCM root.\n        baseline_rev: Experiment baseline revision.\n        wdir: Path to exec working directory relative to SCM root.\n        name: Executor (experiment) name.\n        result: Completed executor result.\n    \"\"\"\n\n    PACKED_ARGS_FILE = \"repro.dat\"\n    WARN_UNTRACKED = False\n    INFOFILE_EXT = \".run\"\n    DEFAULT_LOCATION: str = \"workspace\"\n\n    def __init__(\n        self,\n        root_dir: str,\n        dvc_dir: str,\n        baseline_rev: str,\n        status: TaskStatus,\n        wdir: Optional[str] = None,\n        name: Optional[str] = None,\n        location: Optional[str] = None,\n        result: Optional[\"ExecutorResult\"] = None,\n        **kwargs,\n    ):\n        self.dvc_dir = dvc_dir\n        self.root_dir = root_dir\n        self.wdir = wdir\n        self.name = name\n        self.baseline_rev = baseline_rev\n        self.location: str = location or self.DEFAULT_LOCATION\n        self.result = result\n        self.status = status\n\n    @abstractmethod\n    def init_git(\n        self,\n        repo: \"Repo\",\n        scm: \"Git\",\n        stash_rev: str,\n        entry: \"ExpStashEntry\",\n        infofile: Optional[str],\n        branch: Optional[str] = None,\n    ):\n        \"\"\"Init git repo and populate it using exp refs from the specified\n        SCM instance.\n        \"\"\"\n\n    @property\n    @abstractmethod\n    def git_url(self) -> str:\n        pass\n\n    @abstractmethod\n    def init_cache(self, repo: \"Repo\", rev: str, run_cache: bool = True):\n        \"\"\"Initialize DVC cache.\"\"\"\n\n    @abstractmethod\n    def collect_cache(\n        self, repo: \"Repo\", exp_ref: \"ExpRefInfo\", run_cache: bool = True\n    ):\n        \"\"\"Collect DVC cache.\"\"\"\n\n    @property\n    def info(self) -> \"ExecutorInfo\":\n        if self.result is not None:\n            result_dict: dict[str, Any] = {\n                \"result_hash\": self.result.exp_hash,\n                \"result_ref\": (\n                    str(self.result.ref_info) if self.result.ref_info else None\n                ),\n                \"result_force\": self.result.force,\n            }\n        else:\n            result_dict = {}\n        return ExecutorInfo(\n            git_url=self.git_url,\n            baseline_rev=self.baseline_rev,\n            location=self.location,\n            root_dir=self.root_dir,\n            dvc_dir=self.dvc_dir,\n            name=self.name,\n            wdir=self.wdir,\n            status=self.status,\n            **result_dict,\n        )\n\n    @classmethod\n    def from_info(cls, info: \"ExecutorInfo\") -> \"Self\":\n        if info.result_hash:\n            result: Optional[ExecutorResult] = ExecutorResult(\n                info.result_hash,\n                (ExpRefInfo.from_ref(info.result_ref) if info.result_ref else None),\n                info.result_force,\n            )\n        else:\n            result = None\n        return cls(\n            root_dir=info.root_dir,\n            dvc_dir=info.dvc_dir,\n            baseline_rev=info.baseline_rev,\n            status=info.status,\n            name=info.name,\n            wdir=info.wdir,\n            result=result,\n        )\n\n    @classmethod\n    @abstractmethod\n    def from_stash_entry(\n        cls,\n        repo: \"Repo\",\n        entry: \"ExpStashEntry\",\n        **kwargs,\n    ) -> \"Self\":\n        pass\n\n    @classmethod\n    def _from_stash_entry(\n        cls,\n        repo: \"Repo\",\n        entry: \"ExpStashEntry\",\n        root_dir: str,\n        **kwargs,\n    ) -> \"Self\":\n        return cls(\n            root_dir=root_dir,\n            dvc_dir=relpath(repo.dvc_dir, repo.scm.root_dir),\n            baseline_rev=entry.baseline_rev,\n            status=TaskStatus.PREPARING,\n            name=entry.name,\n            wdir=relpath(os.getcwd(), repo.scm.root_dir),\n            **kwargs,\n        )\n\n    @classmethod\n    def _get_top_level_paths(cls, repo: \"Repo\") -> list[\"str\"]:\n        return list(\n            chain(\n                _collect_top_level_metrics(repo),\n                _collect_top_level_params(repo),\n                repo.index._plot_sources,\n            )\n        )\n\n    @classmethod\n    def save(\n        cls,\n        info: \"ExecutorInfo\",\n        targets: Optional[Iterable[str]] = None,\n        recursive: bool = False,\n        force: bool = False,\n        include_untracked: Optional[list[str]] = None,\n        message: Optional[str] = None,\n    ) -> ExecutorResult:\n        from dvc.dvcfile import LOCK_FILE\n        from dvc.repo import Repo\n\n        exp_hash: Optional[str] = None\n        exp_ref: Optional[ExpRefInfo] = None\n\n        dvc = Repo(os.path.join(info.root_dir, info.dvc_dir))\n        old_cwd = os.getcwd()\n        if info.wdir:\n            os.chdir(os.path.join(dvc.scm.root_dir, info.wdir))\n        else:\n            os.chdir(dvc.root_dir)\n\n        include_untracked = include_untracked or []\n        include_untracked.extend(cls._get_top_level_paths(dvc))\n        # dvc repro automatically stages dvc.lock. Running redundant `git add`\n        # on it causes an error when exiting the detached head context.\n        if LOCK_FILE in dvc.scm.untracked_files():\n            include_untracked.append(LOCK_FILE)\n\n        try:\n            stages = []\n            if targets:\n                for target in targets:\n                    stages.append(  # noqa: PERF401\n                        dvc.commit(\n                            target, recursive=recursive, force=True, relink=False\n                        )\n                    )\n            else:\n                stages = dvc.commit([], recursive=recursive, force=True, relink=False)\n            exp_hash = cls.hash_exp(stages)\n            if include_untracked:\n                from dvc.scm import add_no_submodules\n\n                add_no_submodules(dvc.scm, include_untracked, force=True)  # type: ignore[call-arg]\n\n            with cls.auto_push(dvc):\n                cls.commit(\n                    dvc.scm,  # type: ignore[arg-type]\n                    exp_hash,\n                    exp_name=info.name,\n                    force=force,\n                    message=message,\n                )\n\n            ref: Optional[str] = dvc.scm.get_ref(EXEC_BRANCH, follow=False)\n            exp_ref = ExpRefInfo.from_ref(ref) if ref else None\n            untracked = dvc.scm.untracked_files()\n            if untracked:\n                logger.warning(\n                    \"The following untracked files were present in \"\n                    \"the workspace before saving but \"\n                    \"will not be included in the experiment commit:\\n\"\n                    \"\\t%s\",\n                    \", \".join(untracked),\n                )\n            info.result_hash = exp_hash\n            info.result_ref = ref\n            info.result_force = False\n            info.status = TaskStatus.SUCCESS\n        except DvcException:\n            info.status = TaskStatus.FAILED\n            raise\n        finally:\n            dvc.close()\n            os.chdir(old_cwd)\n\n        return ExecutorResult(ref, exp_ref, info.result_force)\n\n    @staticmethod\n    def hash_exp(stages: Iterable[\"PipelineStage\"]) -> str:\n        from dvc.stage import PipelineStage\n\n        exp_data = {}\n        for stage in stages:\n            if isinstance(stage, PipelineStage):\n                exp_data.update(to_lockfile(stage))\n        return dict_sha256(exp_data)\n\n    def cleanup(self, infofile: Optional[str] = None):\n        if infofile is not None:\n            info = ExecutorInfo.load_json(infofile)\n            if info.status < TaskStatus.FAILED:\n                info.status = TaskStatus.FINISHED\n            info.dump_json(infofile)\n\n    # TODO: come up with better way to stash repro arguments\n    @staticmethod\n    def pack_repro_args(path, *args, fs=None, extra=None, **kwargs):\n        dpath = os.path.dirname(path)\n        if fs:\n            open_func = fs.open\n            fs.makedirs(dpath)\n        else:\n            open_func = open\n            os.makedirs(dpath, exist_ok=True)\n\n        data = {\"args\": args, \"kwargs\": kwargs}\n        if extra is not None:\n            data[\"extra\"] = extra\n        with open_func(path, \"wb\") as fobj:\n            pickle.dump(data, fobj)\n\n    @staticmethod\n    def unpack_repro_args(path):\n        with open(path, \"rb\") as fobj:\n            data = pickle.load(fobj)  # noqa: S301\n        return data[\"args\"], data[\"kwargs\"]\n\n    def fetch_exps(\n        self,\n        dest_scm: \"Git\",\n        refs: list[str],\n        force: bool = False,\n        on_diverged: Optional[Callable[[str], None]] = None,\n        **kwargs,\n    ) -> Iterable[str]:\n        \"\"\"Fetch reproduced experiment refs into the specified SCM.\n\n        Args:\n            dest_scm: Destination Git instance.\n            refs: reference names to be fetched from the remotes.\n            force: If True, diverged refs will be overwritten\n            on_diverged: Callback in the form on_diverged(ref)\n                to be called when an experiment ref has diverged.\n\n        Extra kwargs will be passed into the remote git client.\n        \"\"\"\n\n        def on_diverged_ref(orig_ref: str, new_rev: str):\n            if force:\n                logger.debug(\"Replacing existing experiment '%s'\", orig_ref)\n                return True\n\n            if on_diverged:\n                return on_diverged(orig_ref)\n\n            self._raise_ref_conflict(dest_scm, orig_ref, new_rev)\n            logger.debug(\"Reproduced existing experiment '%s'\", orig_ref)\n            return False\n\n        # fetch experiments\n        try:\n            refspecs = [f\"{ref}:{ref}\" for ref in refs]\n            dest_scm.fetch_refspecs(\n                self.git_url,\n                refspecs,\n                on_diverged=on_diverged_ref,\n                force=force,\n                **kwargs,\n            )\n        except SCMError:\n            pass\n\n        return refs\n\n    @classmethod\n    def _validate_remotes(cls, dvc: \"Repo\", git_remote: Optional[str]):\n        from scmrepo.exceptions import InvalidRemote\n\n        from dvc.scm import InvalidRemoteSCMRepo\n\n        if git_remote == dvc.root_dir:\n            logger.warning(\n                (\n                    \"'%s' points to the current Git repo, experiment \"\n                    \"Git refs will not be pushed. But DVC cache and run cache \"\n                    \"will automatically be pushed to the default DVC remote \"\n                    \"(if any) on each experiment commit.\"\n                ),\n                git_remote,\n            )\n        try:\n            dvc.scm.validate_git_remote(git_remote)\n        except InvalidRemote as exc:\n            raise InvalidRemoteSCMRepo(str(exc))  # noqa: B904\n        dvc.cloud.get_remote_odb()\n\n    @classmethod\n    def reproduce(\n        cls,\n        info: \"ExecutorInfo\",\n        rev: str,\n        queue: Optional[\"Queue\"] = None,\n        infofile: Optional[str] = None,\n        log_errors: bool = True,\n        log_level: Optional[int] = None,\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ) -> \"ExecutorResult\":\n        \"\"\"Run dvc repro and return the result.\n\n        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the\n            experiment hash (or None on error), exp_ref is the experiment ref,\n            and force is a bool specifying whether or not this experiment\n            should force overwrite any existing duplicates.\n        \"\"\"\n        from dvc.repo.checkout import checkout as dvc_checkout\n        from dvc.ui import ui\n\n        if queue is not None:\n            queue.put((rev, os.getpid()))\n\n        log_ctx = cls._set_log_level(log_level) if log_errors else nullcontext()\n\n        exp_hash: Optional[str] = None\n        exp_ref: Optional[ExpRefInfo] = None\n        repro_force: bool = False\n\n        if info.name:\n            ui.write(f\"Reproducing experiment '{info.name}'\")\n\n        with (\n            log_ctx,\n            cls._repro_dvc(\n                info,\n                infofile,\n                log_errors=log_errors,\n                copy_paths=copy_paths,\n                message=message,\n                **kwargs,\n            ) as dvc,\n        ):\n            args, kwargs = cls._repro_args(dvc)\n            if args:\n                targets: Optional[Union[list, str]] = args[0]\n            else:\n                targets = kwargs.get(\"targets\")\n\n            repro_force = kwargs.get(\"force\", False)\n            logger.trace(\"Executor repro with force = '%s'\", str(repro_force))\n\n            repro_dry = kwargs.get(\"dry\")\n\n            if not repro_dry:\n                dvc_checkout(\n                    dvc,\n                    targets=targets,\n                    with_deps=targets is not None,\n                    force=True,\n                    allow_missing=True,\n                    recursive=kwargs.get(\"recursive\", False),\n                )\n\n            kwargs[\"repro_fn\"] = cls._repro_and_track\n            stages = dvc.reproduce(*args, **kwargs)\n            if paths := cls._get_top_level_paths(dvc):\n                logger.debug(\"Staging top-level files: %s\", paths)\n                from dvc.scm import add_no_submodules\n\n                add_no_submodules(dvc.scm, paths)\n\n            exp_hash = cls.hash_exp(stages)\n            if not repro_dry:\n                ref, exp_ref, repro_force = cls._repro_commit(\n                    dvc,\n                    info,\n                    exp_hash,\n                    repro_force,\n                    message=message,\n                )\n                info.result_hash = exp_hash\n                info.result_ref = ref\n                info.result_force = repro_force\n\n        # ideally we would return stages here like a normal repro() call, but\n        # stages is not currently picklable and cannot be returned across\n        # multiprocessing calls\n        return ExecutorResult(exp_hash, exp_ref, repro_force)\n\n    @staticmethod\n    def _repro_and_track(stage: \"Stage\", **kwargs) -> Optional[\"Stage\"]:\n        from dvc.repo.reproduce import _reproduce_stage\n        from dvc.stage.utils import _get_stage_files\n\n        ret = _reproduce_stage(stage, **kwargs)\n        if not kwargs.get(\"dry\") and (paths := _get_stage_files(stage)):\n            logger.debug(\"Staging stage-related files: %s\", paths)\n            stage.repo.scm_context.add(paths)\n        return ret\n\n    @classmethod\n    def _repro_commit(\n        cls,\n        dvc,\n        info,\n        exp_hash,\n        repro_force,\n        message: Optional[str] = None,\n    ) -> tuple[Optional[str], Optional[\"ExpRefInfo\"], bool]:\n        with cls.auto_push(dvc):\n            cls.commit(\n                dvc.scm,\n                exp_hash,\n                exp_name=info.name,\n                force=repro_force,\n                message=message,\n            )\n\n        ref: Optional[str] = dvc.scm.get_ref(EXEC_BRANCH, follow=False)\n        exp_ref: Optional[ExpRefInfo] = ExpRefInfo.from_ref(ref) if ref else None\n        if cls.WARN_UNTRACKED:\n            untracked = dvc.scm.untracked_files()\n            if untracked:\n                logger.warning(\n                    (\n                        \"The following untracked files were present in \"\n                        \"the experiment directory after reproduction but \"\n                        \"will not be included in experiment commits:\\n\"\n                        \"\\t%s\"\n                    ),\n                    \", \".join(untracked),\n                )\n        return ref, exp_ref, repro_force\n\n    @classmethod\n    @contextmanager\n    def _repro_dvc(\n        cls,\n        info: \"ExecutorInfo\",\n        infofile: Optional[str] = None,\n        log_errors: bool = True,\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ) -> Iterator[\"Repo\"]:\n        from dvc.repo import Repo\n        from dvc_studio_client.post_live_metrics import post_live_metrics\n\n        with Repo(os.path.join(info.root_dir, info.dvc_dir)) as dvc:\n            info.status = TaskStatus.RUNNING\n            if infofile is not None:\n                info.dump_json(infofile)\n            dvc.scm_context.quiet = True\n            old_cwd = os.getcwd()\n\n            for path in copy_paths or []:\n                cls._copy_path(os.path.abspath(path), os.path.join(dvc.root_dir, path))\n\n            if info.wdir:\n                os.chdir(os.path.join(dvc.scm.root_dir, info.wdir))\n            else:\n                os.chdir(dvc.root_dir)\n\n            args_path = os.path.join(dvc.tmp_dir, cls.PACKED_ARGS_FILE)\n            if os.path.exists(args_path):\n                _, kwargs = cls.unpack_repro_args(args_path)\n            dvc_studio_config = dvc.config.get(\"studio\")\n            # set missing config options using saved config\n            # inferring repo url will fail if not set here\n            run_env_config = env_to_config(kwargs.get(\"run_env\", {}))\n            dvc_studio_config = run_env_config | dvc_studio_config\n            # override studio repo url if exp git remote set\n            repo_url = get_repo_url(dvc)\n            try:\n                post_live_metrics(\n                    \"start\",\n                    info.baseline_rev,\n                    info.name,  # type: ignore[arg-type]\n                    \"dvc\",\n                    params=to_studio_params(dvc.params.show()),\n                    dvc_studio_config=dvc_studio_config,\n                    message=message,\n                    subdir=get_subrepo_relpath(dvc),\n                    studio_repo_url=repo_url,\n                )\n                logger.debug(\"Running repro in '%s'\", os.getcwd())\n                yield dvc\n                info.status = TaskStatus.SUCCESS\n            except DvcException:\n                if log_errors:\n                    logger.exception(\"\")\n                info.status = TaskStatus.FAILED\n                raise\n            except Exception:\n                if log_errors:\n                    logger.exception(\"unexpected error\")\n                info.status = TaskStatus.FAILED\n                raise\n            finally:\n                from dvc.repo.metrics.show import _gather_metrics\n\n                post_live_metrics(\n                    \"done\",\n                    info.baseline_rev,\n                    info.name,  # type: ignore[arg-type]\n                    \"dvc\",\n                    experiment_rev=dvc.experiments.scm.get_ref(EXEC_BRANCH),\n                    metrics=_gather_metrics(dvc, on_error=\"return\"),\n                    dvc_studio_config=dvc_studio_config,\n                    studio_repo_url=repo_url,\n                )\n\n                if infofile is not None:\n                    info.dump_json(infofile)\n                os.chdir(old_cwd)\n\n    @classmethod\n    def _repro_args(cls, dvc):\n        args_path = os.path.join(dvc.tmp_dir, cls.PACKED_ARGS_FILE)\n        if os.path.exists(args_path):\n            args, kwargs = cls.unpack_repro_args(args_path)\n            remove(args_path)\n            # explicitly git rm/unstage the args file\n            dvc.scm.add([args_path], force=True)\n        else:\n            args = []\n            kwargs = {}\n        return args, kwargs\n\n    @classmethod\n    @contextmanager\n    def auto_push(cls, dvc: \"Repo\") -> Iterator[None]:\n        exp_config = dvc.config.get(\"exp\", {})\n        auto_push = env2bool(DVC_EXP_AUTO_PUSH, exp_config.get(\"auto_push\", False))\n        if not auto_push:\n            yield\n            return\n\n        git_remote = os.getenv(\n            DVC_EXP_GIT_REMOTE, exp_config.get(\"git_remote\", \"origin\")\n        )\n        try:\n            cls._validate_remotes(dvc, git_remote)\n        except DvcException as exc:\n            logger.warning(\"Failed to validate remotes. Disabling auto push: %s\", exc)\n\n            yield\n            return\n        yield\n        cls._auto_push(dvc, git_remote)\n\n    @staticmethod\n    def _auto_push(\n        dvc: \"Repo\",\n        git_remote: Optional[str],\n        push_cache=True,\n        run_cache=True,\n    ):\n        from dvc.ui import ui\n        from dvc.utils import format_link\n\n        branch = dvc.scm.get_ref(EXEC_BRANCH, follow=False)\n        link = format_link(\n            \"https://dvc.org/doc/user-guide/experiment-management/sharing-experiments\"\n        )\n        ui.write(\n            f\"Pushing experiment to '{git_remote}'. Cancel with CTRL+C. \"\n            f\"See {link} for more info.\"\n        )\n        try:\n            dvc.experiments.push(\n                git_remote,\n                branch,\n                push_cache=push_cache,\n                run_cache=run_cache,\n            )\n        except DvcException as exc:\n            logger.warning(\n                (\n                    \"Something went wrong while auto pushing experiment \"\n                    \"to the remote '%s': %s\"\n                ),\n                git_remote,\n                exc,\n            )\n\n    @classmethod\n    def commit(\n        cls,\n        scm: \"Git\",\n        exp_hash: str,\n        exp_name: Optional[str] = None,\n        force: bool = False,\n        message: Optional[str] = None,\n    ):\n        \"\"\"Commit stages as an experiment and return the commit SHA.\"\"\"\n\n        rev = scm.get_rev()\n        if not scm.is_dirty(untracked_files=False):\n            logger.debug(\"No changes to commit\")\n\n        check_conflict = False\n        branch = scm.get_ref(EXEC_BRANCH, follow=False)\n        if branch:\n            old_ref = rev\n            logger.debug(\"Commit to current experiment branch '%s'\", branch)\n        else:\n            baseline_rev = scm.get_ref(EXEC_BASELINE)\n            name = exp_name if exp_name else f\"exp-{exp_hash[:5]}\"\n            ref_info = ExpRefInfo(baseline_rev, name)\n            branch = str(ref_info)\n            old_ref = None\n            if scm.get_ref(branch):\n                if not force:\n                    check_conflict = True\n                logger.debug(\n                    \"%s existing experiment branch '%s'\",\n                    \"Replace\" if force else \"Reuse\",\n                    branch,\n                )\n            else:\n                logger.debug(\"Commit to new experiment branch '%s'\", branch)\n\n        scm.add([], update=True)\n        message = message or f\"dvc: commit experiment {exp_hash}\"\n        scm.commit(message, no_verify=True)\n        new_rev = scm.get_rev()\n        if check_conflict:\n            new_rev = cls._raise_ref_conflict(scm, branch, new_rev)\n        else:\n            scm.set_ref(branch, new_rev, old_ref=old_ref)\n        scm.set_ref(EXEC_BRANCH, branch, symbolic=True)\n\n        return new_rev\n\n    @staticmethod\n    def _raise_ref_conflict(scm, ref, new_rev):\n        # If this commit is a duplicate of the existing commit at 'ref', return\n        # the existing commit. Otherwise, error out and require user to re-run\n        # with --force as needed\n        orig_rev = scm.get_ref(ref)\n        if scm.diff(orig_rev, new_rev):\n            raise ExperimentExistsError(ref)\n        return orig_rev\n\n    @staticmethod\n    def _set_log_level(level: Optional[int]) -> AbstractContextManager[None]:\n        if level is not None:\n            return set_loggers_level(level)\n        return nullcontext()\n\n    @staticmethod\n    def _copy_path(src, dst):\n        try:\n            if os.path.isfile(src):\n                shutil.copy(src, dst)\n            elif os.path.isdir(src):\n                shutil.copytree(src, dst)\n            else:\n                raise DvcException(\n                    f\"Unable to copy '{src}'. It is not a file or directory.\"\n                )\n        except OSError as exc:\n            raise DvcException(f\"Unable to copy '{src}' to '{dst}'.\") from exc\n\n    @contextmanager\n    def set_temp_refs(self, scm: \"Git\", temp_dict: dict[str, str]):\n        # Retry ref set, get, and remove operations to handle transient issues during\n        # concurrent Git access.\n        # Dulwich deletes parent directories of refs if they happen to be empty after\n        # removing a ref, which can interfere with `set_ref` in other processes.\n        # `remove_ref` may also fail with a `FileLocked` error when refs are packed,\n        # since multiple processes might attempt to write to the same file.\n        retry = funcy.retry(10, errors=Exception, timeout=0.1)\n        set_ref = retry(scm.set_ref)\n        get_ref = retry(scm.get_ref)\n        remove_ref = retry(scm.remove_ref)\n\n        try:\n            for ref, rev in temp_dict.items():\n                set_ref(ref, rev)\n            yield\n        finally:\n            for ref in temp_dict:\n                if get_ref(ref):\n                    remove_ref(ref)\n"
  },
  {
    "path": "dvc/repo/experiments/executor/local.py",
    "content": "import os\nfrom contextlib import ExitStack\nfrom tempfile import mkdtemp\nfrom typing import TYPE_CHECKING, Optional, Union\n\nfrom configobj import ConfigObj\nfrom funcy import retry\nfrom shortuuid import uuid\n\nfrom dvc.lock import LockError\nfrom dvc.log import logger\nfrom dvc.repo.experiments.refs import (\n    EXEC_BASELINE,\n    EXEC_BRANCH,\n    EXEC_HEAD,\n    EXEC_MERGE,\n    EXEC_NAMESPACE,\n    TEMP_NAMESPACE,\n)\nfrom dvc.repo.experiments.utils import EXEC_TMP_DIR, get_exp_rwlock\nfrom dvc.scm import SCM, Git\nfrom dvc.utils.fs import remove\nfrom dvc.utils.objects import cached_property\n\nfrom .base import BaseExecutor, TaskStatus\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.experiments.refs import ExpRefInfo\n    from dvc.repo.experiments.stash import ExpStashEntry\n    from dvc.scm import NoSCM\n\nlogger = logger.getChild(__name__)\n\n\nclass BaseLocalExecutor(BaseExecutor):\n    \"\"\"Base local machine executor.\"\"\"\n\n    @property\n    def git_url(self) -> str:\n        root_dir = os.path.abspath(self.root_dir)\n        if os.name == \"nt\":\n            root_dir = root_dir.replace(os.sep, \"/\")\n        return f\"file://{root_dir}\"\n\n    @cached_property\n    def scm(self) -> Union[\"Git\", \"NoSCM\"]:\n        return SCM(self.root_dir)\n\n    def cleanup(self, infofile: Optional[str] = None):\n        self.scm.close()\n        del self.scm\n        super().cleanup(infofile)\n\n    def collect_cache(\n        self, repo: \"Repo\", exp_ref: \"ExpRefInfo\", run_cache: bool = True\n    ):\n        \"\"\"Collect DVC cache.\"\"\"\n\n\nclass TempDirExecutor(BaseLocalExecutor):\n    \"\"\"Temp directory experiment executor.\"\"\"\n\n    # Temp dir executors should warn if untracked files exist (to help with\n    # debugging user code), and suppress other DVC hints (like `git add`\n    # suggestions) that are not applicable outside of workspace runs\n    WARN_UNTRACKED = True\n    DEFAULT_LOCATION = \"tempdir\"\n\n    @retry(180, errors=LockError, timeout=1)\n    def init_git(\n        self,\n        repo: \"Repo\",\n        scm: \"Git\",\n        stash_rev: str,\n        entry: \"ExpStashEntry\",\n        infofile: Optional[str],\n        branch: Optional[str] = None,\n    ):\n        from dulwich.repo import Repo as DulwichRepo\n\n        from dvc.repo.experiments.utils import push_refspec\n\n        DulwichRepo.init(os.fspath(self.root_dir))\n\n        self.status = TaskStatus.PREPARING\n        if infofile:\n            self.info.dump_json(infofile)\n\n        temp_head = f\"{TEMP_NAMESPACE}/head-{uuid()}\"\n        temp_merge = f\"{TEMP_NAMESPACE}/merge-{uuid()}\"\n        temp_baseline = f\"{TEMP_NAMESPACE}/baseline-{uuid()}\"\n\n        temp_ref_dict = {\n            temp_head: entry.head_rev,\n            temp_merge: stash_rev,\n            temp_baseline: entry.baseline_rev,\n        }\n        with (\n            get_exp_rwlock(repo, writes=[temp_head, temp_merge, temp_baseline]),\n            self.set_temp_refs(scm, temp_ref_dict),\n        ):\n            # Executor will be initialized with an empty git repo that\n            # we populate by pushing:\n            #   EXEC_HEAD - the base commit for this experiment\n            #   EXEC_MERGE - the unmerged changes (from our stash)\n            #       to be reproduced\n            #   EXEC_BASELINE - the baseline commit for this experiment\n            refspec = [\n                (temp_head, EXEC_HEAD),\n                (temp_merge, EXEC_MERGE),\n                (temp_baseline, EXEC_BASELINE),\n            ]\n\n            if branch:\n                refspec.append((branch, branch))\n                with get_exp_rwlock(repo, reads=[branch]):\n                    push_refspec(scm, self.git_url, refspec)\n                self.scm.set_ref(EXEC_BRANCH, branch, symbolic=True)\n            else:\n                push_refspec(scm, self.git_url, refspec)\n                if self.scm.get_ref(EXEC_BRANCH):\n                    self.scm.remove_ref(EXEC_BRANCH)\n\n        # checkout EXEC_HEAD and apply EXEC_MERGE on top of it without\n        # committing\n        assert isinstance(self.scm, Git)\n        head = EXEC_BRANCH if branch else EXEC_HEAD\n        self.scm.checkout(head, detach=True)\n        merge_rev = self.scm.get_ref(EXEC_MERGE)\n\n        self.scm.stash.apply(merge_rev)\n        self._update_config(repo.config.read(\"local\"))\n        local_git_config = os.path.join(repo.scm.root_dir, \".git\", \"config\")\n        self._update_git_config(ConfigObj(local_git_config, list_values=False))\n\n    def _update_config(self, update):\n        local_config = os.path.join(self.root_dir, self.dvc_dir, \"config.local\")\n        logger.debug(\"Writing experiments local config '%s'\", local_config)\n        if os.path.exists(local_config):\n            conf_obj = ConfigObj(local_config)\n            conf_obj.merge(update)\n        else:\n            conf_obj = ConfigObj(update)\n        if conf_obj:\n            with open(local_config, \"wb\") as fobj:\n                conf_obj.write(fobj)\n\n    def _update_git_config(self, update):\n        local_config = os.path.join(self.scm.root_dir, \".git\", \"config\")\n        logger.debug(\"Writing experiments local Git config '%s'\", local_config)\n        if os.path.exists(local_config):\n            conf_obj = ConfigObj(local_config, list_values=False)\n            conf_obj.merge(update)\n        else:\n            conf_obj = ConfigObj(update, list_values=False)\n        if conf_obj:\n            with open(local_config, \"wb\") as fobj:\n                conf_obj.write(fobj)\n\n    def init_cache(\n        self,\n        repo: \"Repo\",\n        rev: str,  # noqa: ARG002\n        run_cache: bool = True,  # noqa: ARG002\n    ):\n        \"\"\"Initialize DVC cache.\"\"\"\n        self._update_config({\"cache\": {\"dir\": repo.cache.local_cache_dir}})\n\n    def cleanup(self, infofile: Optional[str] = None):\n        super().cleanup(infofile)\n        logger.debug(\"Removing tmpdir '%s'\", self.root_dir)\n        remove(self.root_dir)\n\n    @classmethod\n    def from_stash_entry(\n        cls,\n        repo: \"Repo\",\n        entry: \"ExpStashEntry\",\n        wdir: Optional[str] = None,\n        **kwargs,\n    ):\n        assert repo.tmp_dir\n        parent_dir: str = wdir or os.path.join(repo.tmp_dir, EXEC_TMP_DIR)\n        os.makedirs(parent_dir, exist_ok=True)\n        tmp_dir = mkdtemp(dir=parent_dir)\n        try:\n            executor = cls._from_stash_entry(repo, entry, tmp_dir, **kwargs)\n            logger.debug(\"Init temp dir executor in '%s'\", tmp_dir)\n            return executor\n        except Exception:\n            remove(tmp_dir)\n            raise\n\n\nclass WorkspaceExecutor(BaseLocalExecutor):\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self._detach_stack = ExitStack()\n\n    @classmethod\n    def from_stash_entry(cls, repo: \"Repo\", entry: \"ExpStashEntry\", **kwargs):\n        root_dir = repo.scm.root_dir\n        executor: WorkspaceExecutor = cls._from_stash_entry(\n            repo, entry, root_dir, **kwargs\n        )\n        logger.debug(\"Init workspace executor in '%s'\", root_dir)\n        return executor\n\n    @retry(180, errors=LockError, timeout=1)\n    def init_git(\n        self,\n        repo: \"Repo\",\n        scm: \"Git\",\n        stash_rev: str,\n        entry: \"ExpStashEntry\",\n        infofile: Optional[str],\n        branch: Optional[str] = None,\n    ):\n        self.status = TaskStatus.PREPARING\n        if infofile:\n            self.info.dump_json(infofile)\n\n        assert isinstance(self.scm, Git)\n\n        with get_exp_rwlock(repo, writes=[EXEC_NAMESPACE]):\n            scm.set_ref(EXEC_HEAD, entry.head_rev)\n            scm.set_ref(EXEC_MERGE, stash_rev)\n            scm.set_ref(EXEC_BASELINE, entry.baseline_rev)\n            self._detach_stack.enter_context(\n                self.scm.detach_head(\n                    self.scm.get_ref(EXEC_HEAD),\n                    force=True,\n                    client=\"dvc\",\n                )\n            )\n            merge_rev = self.scm.get_ref(EXEC_MERGE)\n            self.scm.stash.apply(merge_rev)\n            if branch:\n                self.scm.set_ref(EXEC_BRANCH, branch, symbolic=True)\n            elif scm.get_ref(EXEC_BRANCH):\n                self.scm.remove_ref(EXEC_BRANCH)\n\n    def init_cache(self, repo: \"Repo\", rev: str, run_cache: bool = True):\n        pass\n\n    def cleanup(self, infofile: Optional[str] = None):\n        super().cleanup(infofile)\n        if infofile:\n            remove(os.path.dirname(infofile))\n        with self._detach_stack:\n            self.scm.remove_ref(EXEC_BASELINE)\n            self.scm.remove_ref(EXEC_MERGE)\n            if self.scm.get_ref(EXEC_BRANCH):\n                self.scm.remove_ref(EXEC_BRANCH)\n"
  },
  {
    "path": "dvc/repo/experiments/ls.py",
    "content": "from collections import defaultdict\nfrom typing import Optional, Union\n\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import iter_revs\n\nfrom .utils import exp_refs_by_baseline\n\nlogger = logger.getChild(__name__)\n\n\n@locked\n@scm_context\ndef ls(\n    repo,\n    rev: Optional[Union[list[str], str]] = None,\n    all_commits: bool = False,\n    num: int = 1,\n    git_remote: Optional[str] = None,\n) -> dict[str, list[tuple[str, Optional[str]]]]:\n    \"\"\"List experiments.\n\n    Returns a dict mapping baseline revs to a list of (exp_name, exp_sha) tuples.\n    \"\"\"\n    rev_set = None\n    if not all_commits:\n        rev = rev or \"HEAD\"\n        if isinstance(rev, str):\n            rev = [rev]\n        revs = iter_revs(repo.scm, rev, num)\n        rev_set = set(revs.keys())\n\n    ref_info_dict = exp_refs_by_baseline(repo.scm, rev_set, git_remote)\n    results = defaultdict(list)\n    for baseline in ref_info_dict:\n        for info in ref_info_dict[baseline]:\n            if git_remote:\n                exp_rev = None\n            else:\n                exp_rev = repo.scm.get_ref(str(info))\n            results[baseline].append((info.name, exp_rev))\n\n    return results\n"
  },
  {
    "path": "dvc/repo/experiments/pull.py",
    "content": "from collections.abc import Iterable, Mapping\nfrom typing import Optional, Union\n\nfrom funcy import group_by\nfrom scmrepo.git.backend.base import SyncStatus\n\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import TqdmGit, iter_revs\nfrom dvc.ui import ui\n\nfrom .exceptions import UnresolvedExpNamesError\nfrom .refs import ExpRefInfo\nfrom .utils import exp_commits, exp_refs, exp_refs_by_baseline, resolve_name\n\nlogger = logger.getChild(__name__)\n\n\n@locked\n@scm_context\ndef pull(  # noqa: C901\n    repo,\n    git_remote: str,\n    exp_names: Optional[Union[Iterable[str], str]] = None,\n    all_commits=False,\n    rev: Optional[Union[list[str], str]] = None,\n    num=1,\n    force: bool = False,\n    pull_cache: bool = False,\n    **kwargs,\n) -> Iterable[str]:\n    exp_ref_set: set[ExpRefInfo] = set()\n    if all_commits:\n        exp_ref_set.update(exp_refs(repo.scm, git_remote))\n    elif exp_names:\n        if isinstance(exp_names, str):\n            exp_names = [exp_names]\n        exp_ref_dict = resolve_name(repo.scm, exp_names, git_remote)\n\n        unresolved_exp_names = []\n        for exp_name, exp_ref in exp_ref_dict.items():\n            if exp_ref is None:\n                unresolved_exp_names.append(exp_name)\n            else:\n                exp_ref_set.add(exp_ref)\n\n        if unresolved_exp_names:\n            raise UnresolvedExpNamesError(unresolved_exp_names)\n\n    else:\n        rev = rev or \"HEAD\"\n        if isinstance(rev, str):\n            rev = [rev]\n        rev_dict = iter_revs(repo.scm, rev, num)\n        rev_set = set(rev_dict.keys())\n        ref_info_dict = exp_refs_by_baseline(repo.scm, rev_set, git_remote)\n        for ref_info_list in ref_info_dict.values():\n            exp_ref_set.update(ref_info_list)\n\n    pull_result = _pull(repo, git_remote, exp_ref_set, force)\n\n    if pull_result[SyncStatus.DIVERGED]:\n        diverged_refs = [ref.name for ref in pull_result[SyncStatus.DIVERGED]]\n        ui.warn(\n            f\"Local experiment '{diverged_refs}' has diverged from remote \"\n            \"experiment with the same name. To override the local experiment \"\n            \"re-run with '--force'.\"\n        )\n\n    if pull_cache:\n        pull_cache_ref = (\n            pull_result[SyncStatus.UP_TO_DATE] + pull_result[SyncStatus.SUCCESS]\n        )\n        _pull_cache(repo, pull_cache_ref, **kwargs)\n\n    return [ref.name for ref in pull_result[SyncStatus.SUCCESS]]\n\n\ndef _pull(\n    repo,\n    git_remote: str,\n    refs: Iterable[\"ExpRefInfo\"],\n    force: bool,\n) -> Mapping[SyncStatus, list[\"ExpRefInfo\"]]:\n    refspec_list = [f\"{exp_ref}:{exp_ref}\" for exp_ref in refs]\n    logger.debug(\"git pull experiment '%s' -> '%s'\", git_remote, refspec_list)\n\n    with TqdmGit(desc=\"Fetching git refs\") as pbar:\n        results: Mapping[str, SyncStatus] = repo.scm.fetch_refspecs(\n            git_remote,\n            refspec_list,\n            force=force,\n            progress=pbar.update_git,\n        )\n\n    def group_result(refspec):\n        return results[str(refspec)]\n\n    pull_result: Mapping[SyncStatus, list[ExpRefInfo]] = group_by(group_result, refs)\n\n    return pull_result\n\n\ndef _pull_cache(\n    repo,\n    refs: Union[ExpRefInfo, Iterable[\"ExpRefInfo\"]],\n    dvc_remote=None,\n    jobs=None,\n    run_cache=False,\n):\n    if isinstance(refs, ExpRefInfo):\n        refs = [refs]\n    revs = list(exp_commits(repo.scm, refs))\n    logger.debug(\"dvc fetch experiment '%s'\", refs)\n    repo.fetch(\n        jobs=jobs, remote=dvc_remote, run_cache=run_cache, revs=revs, workspace=False\n    )\n"
  },
  {
    "path": "dvc/repo/experiments/push.py",
    "content": "from collections.abc import Iterable, Mapping\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom funcy import compact, group_by\nfrom scmrepo.git.backend.base import SyncStatus\n\nfrom dvc.env import DVC_STUDIO_TOKEN, DVC_STUDIO_URL\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import Git, TqdmGit, iter_revs\nfrom dvc.utils import env2bool\nfrom dvc.utils.collections import ensure_list\n\nfrom .exceptions import UnresolvedExpNamesError\nfrom .refs import ExpRefInfo\nfrom .utils import exp_commits, exp_refs, exp_refs_by_baseline, resolve_name\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\nclass UploadError(DvcException):\n    def __init__(self, msg, result):\n        self.result = result\n        super().__init__(msg)\n\n\ndef notify_refs_to_studio(\n    repo: \"Repo\", git_remote: str, **refs: list[str]\n) -> Optional[str]:\n    import os\n\n    config = repo.config[\"studio\"]\n    refs = compact(refs)\n    if not refs or env2bool(\"DVC_TEST\"):\n        return None\n\n    token = (\n        os.environ.get(DVC_STUDIO_TOKEN)\n        or os.environ.get(\"STUDIO_TOKEN\")\n        or config.get(\"token\")\n    )\n    if not token:\n        logger.debug(\"Studio token not found.\")\n        return None\n\n    from dulwich.porcelain import get_remote_repo\n\n    from dvc.utils import studio\n\n    _, repo_url = get_remote_repo(repo.scm.dulwich.repo, git_remote)\n    studio_url = os.environ.get(DVC_STUDIO_URL) or config.get(\"url\")\n    d = studio.notify_refs(repo_url, token, base_url=studio_url, **refs)\n    return d.get(\"url\")\n\n\ndef exp_refs_from_names(scm: \"Git\", exp_names: list[str]) -> set[\"ExpRefInfo\"]:\n    exp_ref_set = set()\n    exp_ref_dict = resolve_name(scm, exp_names)\n    unresolved_exp_names = []\n    for exp_name, exp_ref in exp_ref_dict.items():\n        if exp_ref is None:\n            unresolved_exp_names.append(exp_name)\n        else:\n            exp_ref_set.add(exp_ref)\n\n    if unresolved_exp_names:\n        raise UnresolvedExpNamesError(unresolved_exp_names)\n    return exp_ref_set\n\n\ndef exp_refs_from_rev(scm: \"Git\", rev: list[str], num: int = 1) -> set[\"ExpRefInfo\"]:\n    exp_ref_set = set()\n    rev_dict = iter_revs(scm, rev, num)\n    rev_set = set(rev_dict.keys())\n    ref_info_dict = exp_refs_by_baseline(scm, rev_set)\n    for ref_info_list in ref_info_dict.values():\n        exp_ref_set.update(ref_info_list)\n    return exp_ref_set\n\n\n@locked\n@scm_context\ndef push(\n    repo: \"Repo\",\n    git_remote: str,\n    exp_names: Optional[Union[list[str], str]] = None,\n    all_commits: bool = False,\n    rev: Optional[Union[list[str], str]] = None,\n    num: int = 1,\n    force: bool = False,\n    push_cache: bool = False,\n    **kwargs: Any,\n) -> dict[str, Any]:\n    exp_ref_set: set[ExpRefInfo] = set()\n    assert isinstance(repo.scm, Git)\n    if all_commits:\n        exp_ref_set.update(exp_refs(repo.scm))\n    if exp_names:\n        exp_ref_set.update(exp_refs_from_names(repo.scm, ensure_list(exp_names)))\n    else:\n        rev = rev or \"HEAD\"\n        if isinstance(rev, str):\n            rev = [rev]\n        exp_ref_set.update(exp_refs_from_rev(repo.scm, rev, num=num))\n\n    push_result = _push(repo, git_remote, exp_ref_set, force)\n\n    refs = {\n        status.name.lower(): [ref.name for ref in ref_list]\n        for status, ref_list in push_result.items()\n    }\n    result: dict[str, Any] = {**refs, \"uploaded\": 0}\n\n    pushed_refs_info = (\n        push_result[SyncStatus.UP_TO_DATE] + push_result[SyncStatus.SUCCESS]\n    )\n\n    e = None\n    if push_cache:\n        try:\n            result[\"uploaded\"] = _push_cache(repo, pushed_refs_info, **kwargs)\n        except Exception as exc:  # noqa: BLE001\n            e = exc\n\n    pushed_refs = [str(r) for r in pushed_refs_info]\n    result[\"url\"] = notify_refs_to_studio(repo, git_remote, pushed=pushed_refs)\n\n    if e:\n        raise UploadError(\"failed to push cache\", result) from e\n    return result\n\n\ndef _push(\n    repo: \"Repo\",\n    git_remote: str,\n    refs: Iterable[\"ExpRefInfo\"],\n    force: bool,\n) -> Mapping[SyncStatus, list[\"ExpRefInfo\"]]:\n    from scmrepo.exceptions import AuthError\n\n    from dvc.scm import GitAuthError\n\n    refspec_list = [f\"{exp_ref}:{exp_ref}\" for exp_ref in refs]\n    logger.debug(\"git push experiment %s -> '%s'\", refspec_list, git_remote)\n\n    with TqdmGit(desc=\"Pushing git refs\") as pbar:\n        try:\n            results: Mapping[str, SyncStatus] = repo.scm.push_refspecs(\n                git_remote,\n                refspec_list,\n                force=force,\n                progress=pbar.update_git,\n            )\n        except AuthError as exc:\n            raise GitAuthError(str(exc))  # noqa: B904\n\n    def group_result(refspec):\n        return results[str(refspec)]\n\n    pull_result: Mapping[SyncStatus, list[ExpRefInfo]] = group_by(group_result, refs)\n\n    return pull_result\n\n\ndef _push_cache(\n    repo: \"Repo\",\n    refs: Union[ExpRefInfo, Iterable[\"ExpRefInfo\"]],\n    dvc_remote: Optional[str] = None,\n    jobs: Optional[int] = None,\n    run_cache: bool = False,\n) -> int:\n    if isinstance(refs, ExpRefInfo):\n        refs = [refs]\n    assert isinstance(repo.scm, Git)\n    revs = list(exp_commits(repo.scm, refs))\n    logger.debug(\"dvc push experiment '%s'\", refs)\n    return repo.push(\n        jobs=jobs, remote=dvc_remote, run_cache=run_cache, revs=revs, workspace=False\n    )\n"
  },
  {
    "path": "dvc/repo/experiments/queue/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/repo/experiments/queue/base.py",
    "content": "import os\nfrom abc import ABC, abstractmethod\nfrom collections.abc import Collection, Generator, Iterable, Mapping\nfrom dataclasses import asdict, dataclass\nfrom typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union\n\nfrom funcy import retry\n\nfrom dvc.dependency import ParamsDependency\nfrom dvc.env import DVC_EXP_BASELINE_REV, DVC_EXP_NAME, DVC_ROOT\nfrom dvc.lock import LockError\nfrom dvc.log import logger\nfrom dvc.repo.experiments.exceptions import ExperimentExistsError\nfrom dvc.repo.experiments.executor.base import BaseExecutor\nfrom dvc.repo.experiments.executor.local import WorkspaceExecutor\nfrom dvc.repo.experiments.refs import ExpRefInfo\nfrom dvc.repo.experiments.stash import ExpStash, ExpStashEntry\nfrom dvc.repo.experiments.utils import (\n    EXEC_PID_DIR,\n    EXEC_TMP_DIR,\n    get_exp_rwlock,\n    get_random_exp_name,\n)\nfrom dvc.utils.objects import cached_property\nfrom dvc.utils.studio import config_to_env\nfrom dvc_studio_client.post_live_metrics import get_studio_config\n\nfrom .utils import get_remote_executor_refs\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.experiments import Experiments\n    from dvc.repo.experiments.executor.base import ExecutorResult\n    from dvc.repo.experiments.serialize import ExpRange\n    from dvc.scm import Git\n\nlogger = logger.getChild(__name__)\n\n\n@dataclass(frozen=True)\nclass QueueEntry:  # noqa: PLW1641\n    dvc_root: str\n    scm_root: str\n    stash_ref: str\n    stash_rev: str\n    baseline_rev: str\n    branch: Optional[str]\n    name: Optional[str]\n    head_rev: Optional[str] = None\n\n    def __eq__(self, other: object):\n        return (\n            isinstance(other, QueueEntry)\n            and self.dvc_root == other.dvc_root\n            and self.scm_root == other.scm_root\n            and self.stash_ref == other.stash_ref\n            and self.stash_rev == other.stash_rev\n        )\n\n    def asdict(self) -> dict[str, Any]:\n        return asdict(self)\n\n    @classmethod\n    def from_dict(cls, d: dict[str, Any]) -> \"QueueEntry\":\n        return cls(**d)\n\n\nclass QueueGetResult(NamedTuple):\n    entry: QueueEntry\n    executor: BaseExecutor\n\n\nclass QueueDoneResult(NamedTuple):\n    entry: QueueEntry\n    result: Optional[\"ExecutorResult\"]\n\n\nclass ExpRefAndQueueEntry(NamedTuple):\n    exp_ref_info: Optional[\"ExpRefInfo\"]\n    queue_entry: Optional[\"QueueEntry\"]\n\n\nclass BaseStashQueue(ABC):\n    \"\"\"Naive Git-stash based experiment queue.\n\n    Maps queued experiments to (Git) stash reflog entries.\n    \"\"\"\n\n    def __init__(self, repo: \"Repo\", ref: str, failed_ref: Optional[str] = None):\n        \"\"\"Construct a queue.\n\n        Arguments:\n            scm: Git SCM instance for this queue.\n            ref: Git stash ref for this queue.\n            failed_ref: Failed run Git stash ref for this queue.\n        \"\"\"\n        self.repo = repo\n        assert self.repo.tmp_dir\n        self.ref = ref\n        self.failed_ref = failed_ref\n\n    @property\n    def scm(self) -> \"Git\":\n        from dvc.scm import Git\n\n        assert isinstance(self.repo.scm, Git)\n        return self.repo.scm\n\n    @cached_property\n    def stash(self) -> ExpStash:\n        return ExpStash(self.scm, self.ref)\n\n    @cached_property\n    def failed_stash(self) -> Optional[ExpStash]:\n        return ExpStash(self.scm, self.failed_ref) if self.failed_ref else None\n\n    @cached_property\n    def pid_dir(self) -> str:\n        assert self.repo.tmp_dir is not None\n        return os.path.join(self.repo.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)\n\n    @cached_property\n    def args_file(self) -> str:\n        assert self.repo.tmp_dir is not None\n        return os.path.join(self.repo.tmp_dir, BaseExecutor.PACKED_ARGS_FILE)\n\n    @abstractmethod\n    def put(self, *args, **kwargs) -> QueueEntry:\n        \"\"\"Stash an experiment and add it to the queue.\"\"\"\n\n    @abstractmethod\n    def get(self) -> QueueGetResult:\n        \"\"\"Pop and return the first item in the queue.\"\"\"\n\n    def remove(\n        self,\n        revs: Collection[str],\n        all_: bool = False,\n        queued: bool = False,\n        **kwargs,\n    ) -> list[str]:\n        \"\"\"Remove the specified entries from the queue.\n\n        Arguments:\n            revs: Stash revisions or queued exp names to be removed.\n            queued: Remove all queued tasks.\n            all: Remove all tasks.\n\n        Returns:\n            Revisions (or names) which were removed.\n        \"\"\"\n\n        if all_ or queued:\n            return self.clear()\n\n        name_to_remove: list[str] = []\n        entry_to_remove: list[ExpStashEntry] = []\n        queue_entries = self.match_queue_entry_by_name(revs, self.iter_queued())\n        for name, entry in queue_entries.items():\n            if entry:\n                entry_to_remove.append(self.stash.stash_revs[entry.stash_rev])\n                name_to_remove.append(name)\n\n        self.stash.remove_revs(entry_to_remove)\n        return name_to_remove\n\n    def clear(self, **kwargs) -> list[str]:\n        \"\"\"Remove all entries from the queue.\"\"\"\n        stash_revs = self.stash.stash_revs\n        name_to_remove = list(stash_revs)\n        self.stash.remove_revs(list(stash_revs.values()))\n\n        return name_to_remove\n\n    def status(self) -> list[dict[str, Any]]:\n        \"\"\"Show the status of exp tasks in queue\"\"\"\n        from datetime import datetime\n\n        result: list[dict[str, Optional[str]]] = []\n\n        def _get_timestamp(rev: str) -> datetime:\n            commit = self.scm.resolve_commit(rev)\n            return datetime.fromtimestamp(commit.commit_time)  # noqa: DTZ006\n\n        def _format_entry(\n            entry: QueueEntry,\n            exp_result: Optional[\"ExecutorResult\"] = None,\n            status: str = \"Unknown\",\n        ) -> dict[str, Any]:\n            name = entry.name\n            if not name and exp_result and exp_result.ref_info:\n                name = exp_result.ref_info.name\n            # NOTE: We fallback to Unknown status for experiments\n            # generated in prior (incompatible) DVC versions\n            return {\n                \"rev\": entry.stash_rev,\n                \"name\": name,\n                \"timestamp\": _get_timestamp(entry.stash_rev),\n                \"status\": status,\n            }\n\n        result.extend(\n            _format_entry(queue_entry, status=\"Running\")\n            for queue_entry in self.iter_active()\n        )\n        result.extend(\n            _format_entry(queue_entry, status=\"Queued\")\n            for queue_entry in self.iter_queued()\n        )\n        result.extend(\n            _format_entry(queue_entry, status=\"Failed\")\n            for queue_entry, _ in self.iter_failed()\n        )\n        result.extend(\n            _format_entry(queue_entry, exp_result=exp_result, status=\"Success\")\n            for queue_entry, exp_result in self.iter_success()\n        )\n        return result\n\n    @abstractmethod\n    def iter_queued(self) -> Generator[QueueEntry, None, None]:\n        \"\"\"Iterate over items in the queue.\"\"\"\n\n    @abstractmethod\n    def iter_active(self) -> Generator[QueueEntry, None, None]:\n        \"\"\"Iterate over items which are being actively processed.\"\"\"\n\n    @abstractmethod\n    def iter_done(self) -> Generator[QueueDoneResult, None, None]:\n        \"\"\"Iterate over items which been processed.\"\"\"\n\n    @abstractmethod\n    def iter_success(self) -> Generator[QueueDoneResult, None, None]:\n        \"\"\"Iterate over items which been success.\"\"\"\n\n    @abstractmethod\n    def iter_failed(self) -> Generator[QueueDoneResult, None, None]:\n        \"\"\"Iterate over items which been failed.\"\"\"\n\n    @abstractmethod\n    def reproduce(\n        self, copy_paths: Optional[list[str]] = None, message: Optional[str] = None\n    ) -> Mapping[str, Mapping[str, str]]:\n        \"\"\"Reproduce queued experiments sequentially.\"\"\"\n\n    @abstractmethod\n    def get_result(self, entry: QueueEntry) -> Optional[\"ExecutorResult\"]:\n        \"\"\"Return result of the specified item.\n\n        This method blocks until the specified item has been collected.\n        \"\"\"\n\n    @abstractmethod\n    def kill(self, revs: str) -> None:\n        \"\"\"Kill the specified running entries in the queue.\n\n        Arguments:\n            revs: Stash revs or running exp name to be killed.\n        \"\"\"\n\n    @abstractmethod\n    def shutdown(self, kill: bool = False):\n        \"\"\"Shutdown the queue worker.\n\n        Arguments:\n            kill: If True, the any active experiments will be killed and the\n                worker will shutdown immediately. If False, the worker will\n                finish any active experiments before shutting down.\n        \"\"\"\n\n    @abstractmethod\n    def logs(self, rev: str, encoding: Optional[str] = None, follow: bool = False):\n        \"\"\"Print redirected output logs for an exp process.\n\n        Args:\n            rev: Stash rev or exp name.\n            encoding: Text encoding for redirected output. Defaults to\n                `locale.getpreferredencoding()`.\n            follow: Attach to running exp process and follow additional\n                output.\n        \"\"\"\n\n    def _stash_exp(\n        self,\n        *args,\n        params: Optional[dict[str, list[str]]] = None,\n        baseline_rev: Optional[str] = None,\n        branch: Optional[str] = None,\n        name: Optional[str] = None,\n        no_hydra: bool = False,\n        **kwargs,\n    ) -> QueueEntry:\n        \"\"\"Stash changes from the workspace as an experiment.\n\n        Args:\n            params: Dict mapping paths to `Hydra Override`_ patterns,\n                provided via `exp run --set-param`.\n            baseline_rev: Optional baseline rev for this experiment, defaults\n                to the current SCM rev.\n            branch: Optional experiment branch name. If specified, the\n                experiment will be added to `branch` instead of creating\n                a new branch.\n            name: Optional experiment name. If specified this will be used as\n                the human-readable name in the experiment branch ref. Has no\n                effect of branch is specified.\n            no_hydra: Disable Hydra from automatically overwriting all params.\n\n        .. _Hydra Override:\n            https://hydra.cc/docs/next/advanced/override_grammar/basic/\n        \"\"\"\n        with self.scm.stash_workspace(reinstate_index=True) as workspace:\n            with self.scm.detach_head(client=\"dvc\") as orig_head:\n                stash_head = orig_head\n                if baseline_rev is None:\n                    baseline_rev = orig_head\n\n                try:\n                    if workspace:\n                        self.stash.apply(workspace)\n\n                    # update experiment params from command line\n                    if params:\n                        self._update_params(params, no_hydra=no_hydra)\n\n                    # DVC commit data deps to preserve state across workspace\n                    # & tempdir runs\n                    self._stash_commit_deps(*args, **kwargs)\n\n                    # save additional repro command line arguments\n                    run_env = {DVC_EXP_BASELINE_REV: baseline_rev}\n                    if not name:\n                        name = get_random_exp_name(self.scm, baseline_rev)\n                    run_env[DVC_EXP_NAME] = name\n                    # Override DVC_ROOT env var to point to the parent DVC repo\n                    # root (and not an executor tempdir root)\n                    run_env[DVC_ROOT] = self.repo.root_dir\n\n                    # save studio config to read later by dvc and dvclive\n                    studio_config = get_studio_config(\n                        dvc_studio_config=self.repo.config.get(\"studio\")\n                    )\n                    run_env = config_to_env(studio_config) | run_env\n                    self._pack_args(*args, run_env=run_env, **kwargs)\n                    # save experiment as a stash commit\n                    msg = self._stash_msg(\n                        stash_head,\n                        baseline_rev=baseline_rev,\n                        branch=branch,\n                        name=name,\n                    )\n                    stash_rev = self.stash.push(message=msg)\n                    assert stash_rev\n                    logger.debug(\n                        (\n                            \"Stashed experiment '%s' with baseline '%s' \"\n                            \"for future execution.\"\n                        ),\n                        stash_rev[:7],\n                        baseline_rev[:7],\n                    )\n                finally:\n                    # Revert any of our changes before prior unstashing\n                    self.scm.reset(hard=True)\n\n        return QueueEntry(\n            self.repo.root_dir,\n            self.scm.root_dir,\n            self.ref,\n            stash_rev,\n            baseline_rev,\n            branch,\n            name,\n            stash_head,\n        )\n\n    def _stash_commit_deps(self, *args, **kwargs):\n        if args:\n            targets = args[0]\n        else:\n            targets = kwargs.get(\"targets\")\n        if isinstance(targets, str):\n            targets = [targets]\n        elif not targets:\n            targets = [None]\n        for target in targets:\n            self.repo.commit(\n                target,\n                with_deps=True,\n                recursive=kwargs.get(\"recursive\", False),\n                force=True,\n                allow_missing=True,\n                data_only=True,\n                relink=False,\n            )\n\n    @staticmethod\n    def _stash_msg(\n        rev: str,\n        baseline_rev: str,\n        branch: Optional[str] = None,\n        name: Optional[str] = None,\n    ) -> str:\n        if not baseline_rev:\n            baseline_rev = rev\n        msg = ExpStash.format_message(rev, baseline_rev, name)\n        if branch:\n            return f\"{msg}:{branch}\"\n        return msg\n\n    def _pack_args(self, *args, **kwargs) -> None:\n        import pickle\n\n        if os.path.exists(self.args_file) and self.scm.is_tracked(self.args_file):\n            logger.warning(\n                (\n                    \"Temporary DVC file '.dvc/tmp/%s' exists and was \"\n                    \"likely committed to Git by mistake. It should be removed \"\n                    \"with:\\n\"\n                    \"\\tgit rm .dvc/tmp/%s\"\n                ),\n                BaseExecutor.PACKED_ARGS_FILE,\n                BaseExecutor.PACKED_ARGS_FILE,\n            )\n            with open(self.args_file, \"rb\") as fobj:\n                try:\n                    data = pickle.load(fobj)  # noqa: S301\n                except Exception:  # noqa: BLE001\n                    data = {}\n            extra = int(data.get(\"extra\", 0)) + 1\n        else:\n            extra = None\n        BaseExecutor.pack_repro_args(self.args_file, *args, extra=extra, **kwargs)\n        self.scm.add(self.args_file, force=True)\n\n    @staticmethod\n    def _format_new_params_msg(new_params, config_path):\n        \"\"\"Format an error message for when new parameters are identified\"\"\"\n        new_param_count = len(new_params)\n        pluralise = \"s are\" if new_param_count > 1 else \" is\"\n        param_list = \", \".join(new_params)\n        return (\n            f\"{new_param_count} parameter{pluralise} missing \"\n            f\"from '{config_path}': {param_list}\"\n        )\n\n    def _update_params(self, params: dict[str, list[str]], no_hydra: bool = False):\n        \"\"\"Update param files with the provided `Hydra Override`_ patterns.\n\n        Args:\n            params: Dict mapping paths to `Hydra Override`_ patterns,\n                provided via `exp run --set-param`.\n            no_hydra: Disable Hydra from automatically overwriting all params.\n\n        .. _Hydra Override:\n            https://hydra.cc/docs/advanced/override_grammar/basic/\n        \"\"\"\n        from dvc.utils.hydra import apply_overrides, compose_and_dump\n\n        logger.debug(\"Using experiment params '%s'\", params)\n\n        hydra_config = self.repo.config.get(\"hydra\", {})\n        hydra_enabled = hydra_config.get(\"enabled\", False) and not no_hydra\n        hydra_output_file = ParamsDependency.DEFAULT_PARAMS_FILE\n        for path, overrides in params.items():\n            if hydra_enabled and path == hydra_output_file:\n                if (config_module := hydra_config.get(\"config_module\")) is None:\n                    config_dir = os.path.join(\n                        self.repo.root_dir, hydra_config.get(\"config_dir\", \"conf\")\n                    )\n                else:\n                    config_dir = None\n                config_name = hydra_config.get(\"config_name\", \"config\")\n                plugins_path = os.path.join(\n                    self.repo.root_dir, hydra_config.get(\"plugins_path\", \"\")\n                )\n                compose_and_dump(\n                    path,\n                    config_dir,\n                    config_module,\n                    config_name,\n                    plugins_path,\n                    overrides,\n                )\n            else:\n                apply_overrides(path, overrides)\n\n        # Force params file changes to be staged in git\n        # Otherwise in certain situations the changes to params file may be\n        # ignored when we `git stash` them since mtime is used to determine\n        # whether the file is dirty\n        self.scm.add(list(params.keys()))\n\n    @staticmethod\n    @retry(180, errors=LockError, timeout=1)\n    def get_stash_entry(exp: \"Experiments\", queue_entry: QueueEntry) -> \"ExpStashEntry\":\n        stash = ExpStash(exp.scm, queue_entry.stash_ref)\n        stash_rev = queue_entry.stash_rev\n        with get_exp_rwlock(exp.repo, writes=[queue_entry.stash_ref]):\n            stash_entry = stash.stash_revs.get(\n                stash_rev,\n                ExpStashEntry(None, stash_rev, stash_rev, None, None),\n            )\n            if stash_entry.stash_index is not None:\n                stash.drop(stash_entry.stash_index)\n        return stash_entry\n\n    @classmethod\n    def init_executor(\n        cls,\n        exp: \"Experiments\",\n        queue_entry: QueueEntry,\n        executor_cls: type[BaseExecutor] = WorkspaceExecutor,\n        **kwargs,\n    ) -> BaseExecutor:\n        stash_entry = cls.get_stash_entry(exp, queue_entry)\n\n        executor = executor_cls.from_stash_entry(exp.repo, stash_entry, **kwargs)\n\n        stash_rev = queue_entry.stash_rev\n        infofile = exp.celery_queue.get_infofile_path(stash_rev)\n        executor.init_git(\n            exp.repo,\n            exp.repo.scm,\n            stash_rev,\n            stash_entry,\n            infofile,\n            branch=stash_entry.branch,\n        )\n\n        executor.init_cache(exp.repo, stash_rev)\n\n        return executor\n\n    def get_infofile_path(self, name: str) -> str:\n        return os.path.join(\n            self.pid_dir,\n            name,\n            f\"{name}{BaseExecutor.INFOFILE_EXT}\",\n        )\n\n    @staticmethod\n    @retry(180, errors=LockError, timeout=1)\n    def collect_git(\n        exp: \"Experiments\",\n        executor: BaseExecutor,\n        exec_result: \"ExecutorResult\",\n    ) -> dict[str, str]:\n        results = {}\n\n        def on_diverged(ref: str):\n            ref_info = ExpRefInfo.from_ref(ref)\n            raise ExperimentExistsError(ref_info.name)\n\n        refs = get_remote_executor_refs(exp.scm, executor.git_url)\n\n        with get_exp_rwlock(exp.repo, writes=refs):\n            for ref in executor.fetch_exps(\n                exp.scm,\n                refs,\n                force=exec_result.force,\n                on_diverged=on_diverged,\n            ):\n                exp_rev = exp.scm.get_ref(ref)\n                if exp_rev:\n                    assert exec_result.exp_hash\n                    logger.debug(\"Collected experiment '%s'.\", exp_rev[:7])\n                    results[exp_rev] = exec_result.exp_hash\n\n        return results\n\n    @classmethod\n    def collect_executor(\n        cls,\n        exp: \"Experiments\",\n        executor: BaseExecutor,\n        exec_result: \"ExecutorResult\",\n    ) -> dict[str, str]:\n        results = cls.collect_git(exp, executor, exec_result)\n\n        if exec_result.ref_info is not None:\n            executor.collect_cache(exp.repo, exec_result.ref_info)\n\n        return results\n\n    def match_queue_entry_by_name(\n        self,\n        exp_names: Collection[str],\n        *entries: Iterable[Union[QueueEntry, QueueDoneResult]],\n    ) -> dict[str, Optional[QueueEntry]]:\n        from funcy import concat\n\n        entry_name_dict: dict[str, QueueEntry] = {}\n        entry_rev_dict: dict[str, QueueEntry] = {}\n        for entry in concat(*entries):\n            if isinstance(entry, QueueDoneResult):\n                queue_entry: QueueEntry = entry.entry\n                if entry.result is not None and entry.result.ref_info is not None:\n                    name: Optional[str] = entry.result.ref_info.name\n                else:\n                    name = queue_entry.name\n            else:\n                queue_entry = entry\n                name = queue_entry.name\n            if name:\n                entry_name_dict[name] = queue_entry\n            entry_rev_dict[queue_entry.stash_rev] = queue_entry\n\n        result: dict[str, Optional[QueueEntry]] = {}\n        for exp_name in exp_names:\n            result[exp_name] = None\n            if exp_name in entry_name_dict:\n                result[exp_name] = entry_name_dict[exp_name]\n                continue\n            if self.scm.is_sha(exp_name):\n                for rev, entry in entry_rev_dict.items():\n                    if rev.startswith(exp_name.lower()):\n                        result[exp_name] = entry\n                        break\n\n        return result\n\n    def stash_failed(self, entry: QueueEntry) -> None:\n        \"\"\"Add an entry to the failed exp stash.\n\n        Arguments:\n            entry: Failed queue entry to add. ``entry.stash_rev`` must be a\n                valid Git stash commit.\n        \"\"\"\n        if self.failed_stash is not None:\n            assert entry.head_rev\n            logger.debug(\"Stashing failed exp '%s'\", entry.stash_rev[:7])\n            msg = self.failed_stash.format_message(\n                entry.head_rev,\n                baseline_rev=entry.baseline_rev,\n                name=entry.name,\n                branch=entry.branch,\n            )\n            self.scm.set_ref(\n                self.failed_stash.ref,\n                entry.stash_rev,\n                message=f\"commit: {msg}\",\n            )\n\n    @abstractmethod\n    def collect_active_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        fetch_refs: bool = False,\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        \"\"\"Collect data for active (running) experiments.\n\n        Args:\n            baseline_revs: Optional resolved baseline Git SHAs. If set, only experiments\n                derived from the specified revisions will be collected. Defaults to\n                collecting all experiments.\n            fetch_refs: Whether or not to fetch completed checkpoint commits from Git\n                remote.\n\n        Returns:\n            Dict mapping baseline revision to list of active experiments.\n        \"\"\"\n\n    @abstractmethod\n    def collect_queued_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        \"\"\"Collect data for queued experiments.\n\n        Args:\n            baseline_revs: Optional resolved baseline Git SHAs. If set, only experiments\n                derived from the specified revisions will be collected. Defaults to\n                collecting all experiments.\n\n        Returns:\n            Dict mapping baseline revision to list of queued experiments.\n        \"\"\"\n\n    @abstractmethod\n    def collect_failed_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        \"\"\"Collect data for failed experiments.\n\n        Args:\n            baseline_revs: Optional resolved baseline Git SHAs. If set, only experiments\n                derived from the specified revisions will be collected. Defaults to\n                collecting all experiments.\n\n        Returns:\n            Dict mapping baseline revision to list of queued experiments.\n        \"\"\"\n\n    def active_repo(self, name: str) -> \"Repo\":\n        \"\"\"Return a Repo for the specified active experiment if it exists.\"\"\"\n        from dvc.exceptions import DvcException\n        from dvc.repo import Repo\n        from dvc.repo.experiments.exceptions import (\n            ExpNotStartedError,\n            InvalidExpRevError,\n        )\n        from dvc.repo.experiments.executor.base import ExecutorInfo, TaskStatus\n\n        for entry in self.iter_active():\n            if entry.name != name:\n                continue\n            infofile = self.get_infofile_path(entry.stash_rev)\n            executor_info = ExecutorInfo.load_json(infofile)\n            if executor_info.status < TaskStatus.RUNNING:\n                raise ExpNotStartedError(name)\n            dvc_root = os.path.join(executor_info.root_dir, executor_info.dvc_dir)\n            try:\n                return Repo(dvc_root)\n            except (FileNotFoundError, DvcException) as exc:\n                raise InvalidExpRevError(name) from exc\n        raise InvalidExpRevError(name)\n"
  },
  {
    "path": "dvc/repo/experiments/queue/celery.py",
    "content": "import glob\nimport hashlib\nimport locale\nimport logging\nimport os\nfrom collections import defaultdict\nfrom collections.abc import Collection, Generator, Mapping\nfrom typing import TYPE_CHECKING, NamedTuple, Optional, Union\n\nfrom celery.result import AsyncResult\nfrom funcy import first\n\nfrom dvc.daemon import daemonize\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.repo.experiments.exceptions import (\n    UnresolvedQueueExpNamesError,\n    UnresolvedRunningExpNamesError,\n)\nfrom dvc.repo.experiments.executor.base import ExecutorInfo\nfrom dvc.repo.experiments.refs import CELERY_STASH\nfrom dvc.repo.experiments.utils import EXEC_TMP_DIR, get_exp_rwlock\nfrom dvc.ui import ui\nfrom dvc.utils.objects import cached_property\n\nfrom .base import BaseStashQueue, ExpRefAndQueueEntry, QueueDoneResult, QueueEntry\nfrom .exceptions import CannotKillTasksError\nfrom .tasks import run_exp\nfrom .utils import fetch_running_exp_from_temp_dir\n\nif TYPE_CHECKING:\n    from kombu.message import Message\n\n    from dvc.repo.experiments.executor.base import ExecutorResult\n    from dvc.repo.experiments.refs import ExpRefInfo\n    from dvc.repo.experiments.serialize import ExpExecutor, ExpRange\n    from dvc_task.app import FSApp\n    from dvc_task.proc.manager import ProcessManager\n    from dvc_task.worker import TemporaryWorker\n\n    from .base import QueueGetResult\n\nlogger = logger.getChild(__name__)\n\n\nclass _MessageEntry(NamedTuple):\n    msg: \"Message\"\n    entry: QueueEntry\n\n\nclass _TaskEntry(NamedTuple):\n    async_result: AsyncResult\n    entry: QueueEntry\n\n\nclass LocalCeleryQueue(BaseStashQueue):\n    \"\"\"DVC experiment queue.\n\n    Maps queued experiments to (Git) stash reflog entries.\n    \"\"\"\n\n    CELERY_DIR = \"celery\"\n\n    @cached_property\n    def wdir(self) -> str:\n        assert self.repo.tmp_dir is not None\n        return os.path.join(self.repo.tmp_dir, EXEC_TMP_DIR, self.CELERY_DIR)\n\n    @cached_property\n    def celery(self) -> \"FSApp\":\n        from kombu.transport.filesystem import Channel\n\n        # related to https://github.com/treeverse/dvc-task/issues/61\n        Channel.QoS.restore_at_shutdown = False\n\n        from dvc_task.app import FSApp\n\n        app = FSApp(\n            \"dvc-exp-local\",\n            wdir=self.wdir,\n            mkdir=True,\n            include=[\"dvc.repo.experiments.queue.tasks\", \"dvc_task.proc.tasks\"],\n        )\n        app.conf.update({\"task_acks_late\": True, \"result_expires\": None})\n        return app\n\n    @cached_property\n    def proc(self) -> \"ProcessManager\":\n        from dvc_task.proc.manager import ProcessManager\n\n        return ProcessManager(self.pid_dir)\n\n    @cached_property\n    def worker(self) -> \"TemporaryWorker\":\n        from dvc_task.worker import TemporaryWorker\n\n        # NOTE: Use thread pool with concurrency 1 and disabled prefetch.\n        # Worker scaling should be handled by running additional workers,\n        # rather than increasing pool concurrency.\n        #\n        # We use \"threads\" over \"solo\" (inline single-threaded) execution so\n        # that we still have access to the control/broadcast API (which\n        # requires a separate message handling thread in the worker).\n        #\n        # Disabled prefetch ensures that each worker will can only schedule and\n        # execute up to one experiment at a time (and a worker cannot prefetch\n        # additional experiments from the queue).\n        return TemporaryWorker(\n            self.celery,\n            pool=\"threads\",\n            concurrency=1,\n            prefetch_multiplier=1,\n            without_heartbeat=True,\n            without_mingle=True,\n            without_gossip=True,\n            timeout=10,\n            loglevel=\"debug\" if logger.getEffectiveLevel() <= logging.DEBUG else \"info\",\n        )\n\n    def _spawn_worker(self, num: int = 1):\n        \"\"\"spawn one single worker to process to queued tasks.\n\n        Argument:\n            num: serial number of the worker.\n\n        \"\"\"\n        from dvc_task.proc.process import ManagedProcess\n\n        logger.debug(\"Spawning exp queue worker\")\n        wdir_hash = hashlib.sha256(self.wdir.encode(\"utf-8\")).hexdigest()[:6]\n        node_name = f\"dvc-exp-{wdir_hash}-{num}@localhost\"\n        cmd = [\"exp\", \"queue-worker\", node_name]\n        if num == 1:\n            # automatically run celery cleanup when primary worker shuts down\n            cmd.append(\"--clean\")\n        if logger.getEffectiveLevel() <= logging.DEBUG:\n            cmd.append(\"-v\")\n        name = f\"dvc-exp-worker-{num}\"\n\n        logger.debug(\"start a new worker: %s, node: %s\", name, node_name)\n        if os.name == \"nt\":\n            daemonize(cmd)\n        else:\n            ManagedProcess.spawn([\"dvc\", *cmd], wdir=self.wdir, name=name)\n\n    def start_workers(self, count: int) -> int:\n        \"\"\"start some workers to process the queued tasks.\n\n        Argument:\n            count: worker number to be started.\n\n        Returns:\n            newly spawned worker number.\n        \"\"\"\n\n        logger.debug(\"Spawning %s exp queue workers\", count)\n        active_worker: dict = self.worker_status()\n\n        started = 0\n        for num in range(1, 1 + count):\n            wdir_hash = hashlib.sha256(self.wdir.encode(\"utf-8\")).hexdigest()[:6]\n            node_name = f\"dvc-exp-{wdir_hash}-{num}@localhost\"\n            if node_name in active_worker:\n                logger.debug(\"Exp queue worker %s already exist\", node_name)\n                continue\n            self._spawn_worker(num)\n            started += 1\n\n        return started\n\n    def put(\n        self,\n        *args,\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ) -> QueueEntry:\n        \"\"\"Stash an experiment and add it to the queue.\"\"\"\n        with get_exp_rwlock(self.repo, writes=[\"workspace\", CELERY_STASH]):\n            entry = self._stash_exp(*args, **kwargs)\n        self.celery.signature(\n            run_exp.s(entry.asdict(), copy_paths=copy_paths, message=message)\n        ).delay()\n        return entry\n\n    # NOTE: Queue consumption should not be done directly. Celery worker(s)\n    # will automatically consume available experiments.\n    def get(self) -> \"QueueGetResult\":\n        raise NotImplementedError\n\n    def iter_queued(self) -> Generator[QueueEntry, None, None]:\n        for _, entry in self._iter_queued():\n            yield entry\n\n    def _iter_queued(self) -> Generator[_MessageEntry, None, None]:\n        for msg in self.celery.iter_queued():\n            if msg.headers.get(\"task\") != run_exp.name:\n                continue\n            args, kwargs, _embed = msg.decode()\n            entry_dict = kwargs.get(\"entry_dict\", args[0])\n            logger.trace(\"Found queued task %s\", entry_dict[\"stash_rev\"])\n            yield _MessageEntry(msg, QueueEntry.from_dict(entry_dict))\n\n    def _iter_processed(self) -> Generator[_MessageEntry, None, None]:\n        for msg in self.celery.iter_processed():\n            if msg.headers.get(\"task\") != run_exp.name:\n                continue\n            args, kwargs, _embed = msg.decode()\n            entry_dict = kwargs.get(\"entry_dict\", args[0])\n            yield _MessageEntry(msg, QueueEntry.from_dict(entry_dict))\n\n    def _iter_active_tasks(self) -> Generator[_TaskEntry, None, None]:\n        for msg, entry in self._iter_processed():\n            task_id = msg.headers[\"id\"]\n            result: AsyncResult = AsyncResult(task_id)\n            if not result.ready():\n                logger.trace(\"Found active task %s\", entry.stash_rev)\n                yield _TaskEntry(result, entry)\n\n    def _iter_done_tasks(self) -> Generator[_TaskEntry, None, None]:\n        for msg, entry in self._iter_processed():\n            task_id = msg.headers[\"id\"]\n            result: AsyncResult = AsyncResult(task_id)\n            if result.ready():\n                logger.trace(\"Found done task %s\", entry.stash_rev)\n                yield _TaskEntry(result, entry)\n\n    def iter_active(self) -> Generator[QueueEntry, None, None]:\n        for _, entry in self._iter_active_tasks():\n            yield entry\n\n    def iter_done(self) -> Generator[QueueDoneResult, None, None]:\n        for result, entry in self._iter_done_tasks():\n            try:\n                exp_result = self.get_result(entry)\n            except FileNotFoundError:\n                if result.status == \"SUCCESS\":\n                    raise DvcException(  # noqa: B904\n                        f\"Invalid experiment '{entry.stash_rev[:7]}'.\"\n                    )\n                if result.status == \"FAILURE\":\n                    exp_result = None\n            yield QueueDoneResult(entry, exp_result)\n\n    def iter_success(self) -> Generator[QueueDoneResult, None, None]:\n        for queue_entry, exp_result in self.iter_done():\n            if exp_result and exp_result.exp_hash and exp_result.ref_info:\n                yield QueueDoneResult(queue_entry, exp_result)\n\n    def iter_failed(self) -> Generator[QueueDoneResult, None, None]:\n        for queue_entry, exp_result in self.iter_done():\n            if exp_result is None:\n                yield QueueDoneResult(queue_entry, exp_result)\n\n    def reproduce(\n        self, copy_paths: Optional[list[str]] = None, message: Optional[str] = None\n    ) -> Mapping[str, Mapping[str, str]]:\n        raise NotImplementedError\n\n    def _load_info(self, rev: str) -> ExecutorInfo:\n        infofile = self.get_infofile_path(rev)\n        return ExecutorInfo.load_json(infofile)\n\n    def _get_done_result(\n        self, entry: QueueEntry, timeout: Optional[float] = None\n    ) -> Optional[\"ExecutorResult\"]:\n        from celery.exceptions import TimeoutError as _CeleryTimeout\n\n        for msg, processed_entry in self._iter_processed():\n            if entry.stash_rev == processed_entry.stash_rev:\n                task_id = msg.headers[\"id\"]\n                result: AsyncResult = AsyncResult(task_id)\n                if not result.ready():\n                    logger.debug(\"Waiting for exp task '%s' to complete\", result.id)\n                    try:\n                        result.get(timeout=timeout)\n                    except _CeleryTimeout as exc:\n                        raise DvcException(\n                            \"Timed out waiting for exp to finish.\"\n                        ) from exc\n                executor_info = self._load_info(entry.stash_rev)\n                return executor_info.result\n        raise FileNotFoundError\n\n    def get_result(\n        self, entry: QueueEntry, timeout: Optional[float] = None\n    ) -> Optional[\"ExecutorResult\"]:\n        try:\n            return self._get_done_result(entry, timeout)\n        except FileNotFoundError:\n            pass\n\n        for queue_entry in self.iter_queued():\n            if entry.stash_rev == queue_entry.stash_rev:\n                raise DvcException(\"Experiment has not been started.\")\n\n        # NOTE: It's possible for an exp to complete while iterating through\n        # other queued and active tasks, in which case the exp will get moved\n        # out of the active task list, and needs to be loaded here.\n        return self._get_done_result(entry, timeout)\n\n    def wait(self, revs: Collection[str], **kwargs) -> None:\n        \"\"\"Block until the specified tasks have completed.\"\"\"\n        revs = [revs] if isinstance(revs, str) else revs\n        results = self.match_queue_entry_by_name(\n            revs, self.iter_queued(), self.iter_done(), self.iter_failed()\n        )\n        for entry in results.values():\n            if not entry:\n                continue\n            self.wait_for_start(entry, **kwargs)\n            try:\n                self.get_result(entry)\n            except FileNotFoundError:\n                pass\n\n    def wait_for_start(self, entry: QueueEntry, sleep_interval: float = 0.001) -> None:\n        \"\"\"Block until the specified task has been started.\"\"\"\n        import time\n\n        while not self.proc.get(entry.stash_rev):\n            time.sleep(sleep_interval)\n\n    def _get_running_task_ids(self) -> set[str]:\n        running_task_ids: set[str] = set()\n        active_workers = self.worker_status()\n        for tasks in active_workers.values():\n            task = first(tasks)\n            if task:\n                running_task_ids.add(task[\"id\"])\n        return running_task_ids\n\n    def _try_to_kill_tasks(\n        self, to_kill: dict[QueueEntry, str], force: bool\n    ) -> dict[QueueEntry, str]:\n        fail_to_kill_entries: dict[QueueEntry, str] = {}\n        for queue_entry, rev in to_kill.items():\n            try:\n                if force:\n                    self.proc.kill(queue_entry.stash_rev)\n                else:\n                    self.proc.interrupt(queue_entry.stash_rev)\n                ui.write(f\"{rev} has been killed.\")\n            except ProcessLookupError:\n                fail_to_kill_entries[queue_entry] = rev\n        return fail_to_kill_entries\n\n    def _mark_inactive_tasks_failure(\n        self, remained_entries: dict[QueueEntry, str]\n    ) -> None:\n        remained_revs: list[str] = []\n        running_ids = self._get_running_task_ids()\n        logger.debug(\"Current running tasks ids: %s.\", running_ids)\n        for msg, entry in self._iter_processed():\n            if entry not in remained_entries:\n                continue\n            task_id = msg.headers[\"id\"]\n            if task_id in running_ids:\n                remained_revs.append(remained_entries[entry])\n            else:\n                result: AsyncResult = AsyncResult(task_id)\n                if not result.ready():\n                    logger.debug(\n                        \"Task id %s rev %s marked as failure.\",\n                        task_id,\n                        remained_entries[entry],\n                    )\n                    backend = self.celery.backend\n                    backend.mark_as_failure(task_id, None)  # type: ignore[attr-defined]\n\n        if remained_revs:\n            raise CannotKillTasksError(remained_revs)\n\n    def _kill_entries(self, entries: dict[QueueEntry, str], force: bool) -> None:\n        logger.debug(\"Found active tasks: '%s' to kill\", list(entries.values()))\n        inactive_entries: dict[QueueEntry, str] = self._try_to_kill_tasks(\n            entries, force\n        )\n\n        if inactive_entries:\n            self._mark_inactive_tasks_failure(inactive_entries)\n\n    def kill(self, revs: Collection[str], force: bool = False) -> None:\n        name_dict: dict[str, Optional[QueueEntry]] = self.match_queue_entry_by_name(\n            set(revs), self.iter_active()\n        )\n\n        missing_revs: list[str] = []\n        to_kill: dict[QueueEntry, str] = {}\n        for rev, queue_entry in name_dict.items():\n            if queue_entry is None:\n                missing_revs.append(rev)\n            else:\n                to_kill[queue_entry] = rev\n\n        if to_kill:\n            self._kill_entries(to_kill, force)\n\n        if missing_revs:\n            raise UnresolvedRunningExpNamesError(missing_revs)\n\n    def shutdown(self, kill: bool = False):\n        self.celery.control.shutdown()\n        if kill:\n            to_kill: dict[QueueEntry, str] = {}\n            for entry in self.iter_active():\n                to_kill[entry] = entry.name or entry.stash_rev\n            if to_kill:\n                self._kill_entries(to_kill, True)\n\n    def follow(self, entry: QueueEntry, encoding: Optional[str] = None):\n        for line in self.proc.follow(entry.stash_rev, encoding):\n            ui.write(line, end=\"\")\n\n    def logs(self, rev: str, encoding: Optional[str] = None, follow: bool = False):\n        queue_entry: Optional[QueueEntry] = self.match_queue_entry_by_name(\n            {rev}, self.iter_active(), self.iter_done()\n        ).get(rev)\n        if queue_entry is None:\n            if self.match_queue_entry_by_name({rev}, self.iter_queued()).get(rev):\n                raise DvcException(\n                    f\"Experiment '{rev}' is in queue but has not been started\"\n                )\n            raise UnresolvedQueueExpNamesError([rev])\n        if follow:\n            ui.write(\n                f\"Following logs for experiment '{rev}'. Use Ctrl+C to stop \"\n                \"following logs (experiment execution will continue).\\n\"\n            )\n            try:\n                self.follow(queue_entry)\n            except KeyboardInterrupt:\n                pass\n            return\n        try:\n            proc_info = self.proc[queue_entry.stash_rev]\n        except KeyError:\n            message = f\"No output logs found for experiment '{rev}'\"\n            if self.match_queue_entry_by_name({rev}, self.iter_failed()).get(rev):\n                message += \"\\nExperiment likely failed during setup.\"\n                if celery_logs := glob.glob(os.path.join(self.wdir, \"*.out\")):\n                    message += \" Check the celery logs for more details:\"\n                    message += f\"\\n\\tcat {' '.join(celery_logs)}\".expandtabs(4)\n            raise DvcException(message)  # noqa: B904\n        with open(\n            proc_info.stdout, encoding=encoding or locale.getpreferredencoding()\n        ) as fobj:\n            ui.write(fobj.read())\n\n    def worker_status(self) -> dict[str, list[dict]]:\n        \"\"\"Return the current active celery worker\"\"\"\n        status = self.celery.control.inspect().active() or {}\n        logger.debug(\"Worker status: %s\", status)\n        return status\n\n    def clear(self, *args, **kwargs):\n        from .remove import celery_clear\n\n        return celery_clear(self, *args, **kwargs)\n\n    def remove(self, *args, **kwargs):\n        from .remove import celery_remove\n\n        return celery_remove(self, *args, **kwargs)\n\n    def get_ref_and_entry_by_names(\n        self,\n        exp_names: Union[str, list[str]],\n        git_remote: Optional[str] = None,\n    ) -> dict[str, ExpRefAndQueueEntry]:\n        \"\"\"Find finished ExpRefInfo or queued or failed QueueEntry by name\"\"\"\n        from dvc.repo.experiments.utils import resolve_name\n\n        if isinstance(exp_names, str):\n            exp_names = [exp_names]\n        results: dict[str, ExpRefAndQueueEntry] = {}\n\n        exp_ref_match: dict[str, Optional[ExpRefInfo]] = resolve_name(\n            self.scm, exp_names, git_remote\n        )\n        if not git_remote:\n            queue_entry_match: dict[str, Optional[QueueEntry]] = (\n                self.match_queue_entry_by_name(\n                    exp_names, self.iter_queued(), self.iter_done()\n                )\n            )\n\n        for exp_name in exp_names:\n            exp_ref = exp_ref_match[exp_name]\n            queue_entry = None if git_remote else queue_entry_match[exp_name]\n            results[exp_name] = ExpRefAndQueueEntry(exp_ref, queue_entry)\n        return results\n\n    def collect_active_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        fetch_refs: bool = False,\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        from dvc.repo import Repo\n        from dvc.repo.experiments.collect import collect_exec_branch\n        from dvc.repo.experiments.serialize import (\n            ExpExecutor,\n            ExpRange,\n            LocalExpExecutor,\n        )\n\n        result: dict[str, list[ExpRange]] = defaultdict(list)\n        for entry in self.iter_active():\n            if baseline_revs and entry.baseline_rev not in baseline_revs:\n                continue\n            if fetch_refs:\n                fetch_running_exp_from_temp_dir(self, entry.stash_rev, fetch_refs)\n            proc_info = self.proc.get(entry.stash_rev)\n            executor_info = self._load_info(entry.stash_rev)\n            if proc_info:\n                local_exec: Optional[LocalExpExecutor] = LocalExpExecutor(\n                    root=executor_info.root_dir,\n                    log=proc_info.stdout,\n                    pid=proc_info.pid,\n                    task_id=entry.stash_rev,\n                )\n            else:\n                local_exec = None\n            dvc_root = os.path.join(executor_info.root_dir, executor_info.dvc_dir)\n            with Repo(dvc_root) as exec_repo:\n                kwargs[\"cache\"] = self.repo.experiments.cache\n                exps = list(\n                    collect_exec_branch(exec_repo, executor_info.baseline_rev, **kwargs)\n                )\n            exps[0].rev = entry.stash_rev\n            exps[0].name = entry.name\n            result[entry.baseline_rev].append(\n                ExpRange(\n                    exps,\n                    executor=ExpExecutor(\n                        \"running\",\n                        name=executor_info.location,\n                        local=local_exec,\n                    ),\n                    name=entry.name,\n                )\n            )\n        return result\n\n    def collect_queued_data(\n        self, baseline_revs: Optional[Collection[str]], **kwargs\n    ) -> dict[str, list[\"ExpRange\"]]:\n        from dvc.repo.experiments.collect import collect_rev\n        from dvc.repo.experiments.serialize import (\n            ExpExecutor,\n            ExpRange,\n            LocalExpExecutor,\n        )\n\n        result: dict[str, list[ExpRange]] = defaultdict(list)\n        for entry in self.iter_queued():\n            if baseline_revs and entry.baseline_rev not in baseline_revs:\n                continue\n            exp = collect_rev(self.repo, entry.stash_rev, **kwargs)\n            exp.name = entry.name\n            local_exec: Optional[LocalExpExecutor] = LocalExpExecutor(\n                task_id=entry.stash_rev,\n            )\n            result[entry.baseline_rev].append(\n                ExpRange(\n                    [exp],\n                    executor=ExpExecutor(\"queued\", name=\"dvc-task\", local=local_exec),\n                    name=entry.name,\n                )\n            )\n        return result\n\n    def collect_failed_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        from dvc.repo.experiments.collect import collect_rev\n        from dvc.repo.experiments.serialize import (\n            ExpExecutor,\n            ExpRange,\n            LocalExpExecutor,\n            SerializableError,\n        )\n\n        result: dict[str, list[ExpRange]] = defaultdict(list)\n        for entry, _ in self.iter_failed():\n            if baseline_revs and entry.baseline_rev not in baseline_revs:\n                continue\n            proc_info = self.proc.get(entry.stash_rev)\n            if proc_info:\n                local_exec: Optional[LocalExpExecutor] = LocalExpExecutor(\n                    log=proc_info.stdout,\n                    pid=proc_info.pid,\n                    returncode=proc_info.returncode,\n                    task_id=entry.stash_rev,\n                )\n            else:\n                local_exec = None\n            exp = collect_rev(self.repo, entry.stash_rev, **kwargs)\n            exp.name = entry.name\n            exp.error = SerializableError(\"Experiment run failed\")\n            result[entry.baseline_rev].append(\n                ExpRange(\n                    [exp],\n                    executor=ExpExecutor(\"failed\", local=local_exec),\n                    name=entry.name,\n                )\n            )\n        return result\n\n    def collect_success_executors(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, \"ExpExecutor\"]:\n        \"\"\"Map exp refs to any available successful executors.\"\"\"\n        from dvc.repo.experiments.serialize import ExpExecutor, LocalExpExecutor\n\n        result: dict[str, ExpExecutor] = {}\n        for entry, exec_result in self.iter_success():\n            if baseline_revs and entry.baseline_rev not in baseline_revs:\n                continue\n            if not (exec_result and exec_result.ref_info):\n                continue\n            proc_info = self.proc.get(entry.stash_rev)\n            if proc_info:\n                local_exec: Optional[LocalExpExecutor] = LocalExpExecutor(\n                    log=proc_info.stdout,\n                    pid=proc_info.pid,\n                    returncode=proc_info.returncode,\n                    task_id=entry.stash_rev,\n                )\n            else:\n                local_exec = None\n            result[str(exec_result.ref_info)] = ExpExecutor(\n                \"success\", name=\"dvc-task\", local=local_exec\n            )\n        return result\n"
  },
  {
    "path": "dvc/repo/experiments/queue/exceptions.py",
    "content": "from collections.abc import Collection\n\nfrom dvc.exceptions import DvcException\n\n\nclass CannotKillTasksError(DvcException):\n    def __init__(self, revs: Collection[str]):\n        rev_str = \",\".join(revs)\n        super().__init__(\n            f\"Task '{rev_str}' is initializing, please wait a few seconds \"\n            \"until the experiments start running to retry the kill operation.\"\n        )\n"
  },
  {
    "path": "dvc/repo/experiments/queue/remove.py",
    "content": "from collections.abc import Collection, Iterable\nfrom typing import TYPE_CHECKING, Union\n\nfrom dvc.repo.experiments.exceptions import UnresolvedExpNamesError\nfrom dvc.repo.experiments.queue.base import QueueDoneResult\n\nif TYPE_CHECKING:\n    from dvc.repo.experiments.queue.base import QueueEntry\n    from dvc.repo.experiments.queue.celery import LocalCeleryQueue\n    from dvc.repo.experiments.stash import ExpStashEntry\n\n\ndef remove_tasks(  # noqa: C901, PLR0912\n    celery_queue: \"LocalCeleryQueue\",\n    queue_entries: Iterable[\"QueueEntry\"],\n):\n    \"\"\"Remove tasks from task queue.\n\n    Arguments:\n        queue_entries: An iterable list of task to remove\n    \"\"\"\n    from celery.result import AsyncResult\n\n    stash_revs: dict[str, ExpStashEntry] = {}\n    failed_stash_revs: list[ExpStashEntry] = []\n    done_entry_set: set[QueueEntry] = set()\n    stash_rev_all = celery_queue.stash.stash_revs\n    failed_rev_all: dict[str, ExpStashEntry] = {}\n    if celery_queue.failed_stash:\n        failed_rev_all = celery_queue.failed_stash.stash_revs\n    for entry in queue_entries:\n        if entry.stash_rev in stash_rev_all:\n            stash_revs[entry.stash_rev] = stash_rev_all[entry.stash_rev]\n        else:\n            done_entry_set.add(entry)\n            if entry.stash_rev in failed_rev_all:\n                failed_stash_revs.append(failed_rev_all[entry.stash_rev])\n\n    try:\n        for msg, queue_entry in celery_queue._iter_queued():\n            if queue_entry.stash_rev in stash_revs and msg.delivery_tag:\n                celery_queue.celery.reject(msg.delivery_tag)\n    finally:\n        celery_queue.stash.remove_revs(list(stash_revs.values()))\n\n    try:\n        for msg, queue_entry in celery_queue._iter_processed():\n            if queue_entry not in done_entry_set:\n                continue\n            task_id = msg.headers[\"id\"]\n            result: AsyncResult = AsyncResult(task_id)\n            if result is not None:\n                result.forget()\n            if msg.delivery_tag:\n                celery_queue.celery.purge(msg.delivery_tag)\n    finally:\n        if celery_queue.failed_stash:\n            celery_queue.failed_stash.remove_revs(failed_stash_revs)\n\n\ndef _get_names(entries: Iterable[Union[\"QueueEntry\", \"QueueDoneResult\"]]):\n    names: list[str] = []\n    for entry in entries:\n        if isinstance(entry, QueueDoneResult):\n            if entry.result and entry.result.ref_info:\n                names.append(entry.result.ref_info.name)\n                continue\n            entry = entry.entry\n        name = entry.name\n        name = name or entry.stash_rev[:7]\n        names.append(name)\n    return names\n\n\ndef celery_clear(\n    self: \"LocalCeleryQueue\",\n    queued: bool = False,\n    failed: bool = False,\n    success: bool = False,\n) -> list[str]:\n    \"\"\"Remove entries from the queue.\n\n    Arguments:\n        queued: Remove all queued tasks.\n        failed: Remove all failed tasks.\n        success: Remove all success tasks.\n\n    Returns:\n        Revisions which were removed.\n    \"\"\"\n\n    removed: list[str] = []\n    entry_list: list[QueueEntry] = []\n    if queued:\n        queue_entries: list[QueueEntry] = list(self.iter_queued())\n        entry_list.extend(queue_entries)\n        removed.extend(_get_names(queue_entries))\n    if failed:\n        failed_tasks: list[QueueDoneResult] = list(self.iter_failed())\n        entry_list.extend([result.entry for result in failed_tasks])\n        removed.extend(_get_names(failed_tasks))\n    if success:\n        success_tasks: list[QueueDoneResult] = list(self.iter_success())\n        entry_list.extend([result.entry for result in success_tasks])\n        removed.extend(_get_names(success_tasks))\n\n    remove_tasks(self, entry_list)\n\n    return removed\n\n\ndef celery_remove(self: \"LocalCeleryQueue\", revs: Collection[str]) -> list[str]:\n    \"\"\"Remove the specified entries from the queue.\n\n    Arguments:\n        revs: Stash revisions or queued exp names to be removed.\n\n    Returns:\n        Revisions (or names) which were removed.\n    \"\"\"\n\n    match_results = self.match_queue_entry_by_name(\n        revs, self.iter_queued(), self.iter_done()\n    )\n\n    remained: list[str] = []\n    removed: list[str] = []\n    entry_to_remove: list[QueueEntry] = []\n    for name, entry in match_results.items():\n        if entry:\n            entry_to_remove.append(entry)\n            removed.append(name)\n        else:\n            remained.append(name)\n\n    if remained:\n        raise UnresolvedExpNamesError(remained)\n\n    if entry_to_remove:\n        remove_tasks(self, entry_to_remove)\n\n    return removed\n"
  },
  {
    "path": "dvc/repo/experiments/queue/tasks.py",
    "content": "from typing import TYPE_CHECKING, Any, Optional\n\nfrom celery import shared_task\nfrom celery.utils.log import get_task_logger\n\nfrom dvc.repo.experiments.executor.base import ExecutorInfo\nfrom dvc.repo.experiments.executor.local import TempDirExecutor\n\nfrom .base import BaseStashQueue, QueueEntry\n\nif TYPE_CHECKING:\n    from dvc.repo.experiments.executor.base import BaseExecutor\n\n\nlogger = get_task_logger(__name__)\n\n\n@shared_task\ndef setup_exp(entry_dict: dict[str, Any]) -> \"BaseExecutor\":\n    \"\"\"Setup an experiment.\n\n    Arguments:\n        entry_dict: Serialized QueueEntry for this experiment.\n\n    Returns:\n        Root executor (temp) directory for this experiment.\n    \"\"\"\n    from dvc.repo import Repo\n\n    entry = QueueEntry.from_dict(entry_dict)\n    with Repo(entry.dvc_root) as repo:\n        # TODO: split executor.init_cache into separate subtask - we can release\n        # exp.scm_lock before DVC push\n        executor = BaseStashQueue.init_executor(\n            repo.experiments,\n            entry,\n            TempDirExecutor,\n            location=\"dvc-task\",\n        )\n        infofile = repo.experiments.celery_queue.get_infofile_path(entry.stash_rev)\n        executor.info.dump_json(infofile)\n    return executor\n\n\n@shared_task\ndef collect_exp(\n    proc_dict: dict[str, Any],  # noqa: ARG001\n    entry_dict: dict[str, Any],\n) -> str:\n    \"\"\"Collect results for an experiment.\n\n    Arguments:\n        proc_dict: Serialized ProcessInfo for experiment executor process.\n        entry_dict: Serialized QueueEntry for this experiment.\n\n    Returns:\n        Directory to be cleaned up after this experiment.\n    \"\"\"\n    from dvc.repo import Repo\n\n    entry = QueueEntry.from_dict(entry_dict)\n    with Repo(entry.dvc_root) as repo:\n        celery_queue = repo.experiments.celery_queue\n        infofile = celery_queue.get_infofile_path(entry.stash_rev)\n        executor_info = ExecutorInfo.load_json(infofile)\n        logger.debug(\"Collecting experiment info '%s'\", str(executor_info))\n        executor = TempDirExecutor.from_info(executor_info)\n        exec_result = executor_info.result\n        try:\n            if exec_result is not None:\n                BaseStashQueue.collect_executor(repo.experiments, executor, exec_result)\n            else:\n                logger.debug(\"Experiment failed (Exec result was None)\")\n                celery_queue.stash_failed(entry)\n        except Exception:\n            # Log exceptions but do not re-raise so that task chain execution\n            # continues\n            logger.exception(\"Failed to collect experiment\")\n    return executor.root_dir\n\n\n@shared_task\ndef cleanup_exp(executor: TempDirExecutor, infofile: str) -> None:\n    \"\"\"Cleanup after an experiment.\n\n    Arguments:\n        tmp_dir: Temp directory to be removed.\n        entry_dict: Serialized QueueEntry for this experiment.\n    \"\"\"\n    executor.cleanup(infofile)\n\n\n@shared_task\ndef run_exp(\n    entry_dict: dict[str, Any],\n    copy_paths: Optional[list[str]] = None,\n    message: Optional[str] = None,\n) -> None:\n    \"\"\"Run a full experiment.\n\n    Experiment subtasks are executed inline as one atomic operation.\n\n    Arguments:\n        entry_dict: Serialized QueueEntry for this experiment.\n    \"\"\"\n    from dvc.repo import Repo\n\n    entry = QueueEntry.from_dict(entry_dict)\n    with Repo(entry.dvc_root) as repo:\n        queue = repo.experiments.celery_queue\n        infofile = queue.get_infofile_path(entry.stash_rev)\n    executor = setup_exp.s(entry_dict)()\n    try:\n        cmd = [\"dvc\", \"exp\", \"exec-run\", \"--infofile\", infofile]\n        if copy_paths:\n            for path in copy_paths:\n                cmd.extend([\"--copy-paths\", path])\n        if message:\n            cmd.extend([\"--message\", message])\n        proc_dict = queue.proc.run_signature(cmd, name=entry.stash_rev)()\n        collect_exp.s(proc_dict, entry_dict)()\n    finally:\n        cleanup_exp.s(executor, infofile)()\n"
  },
  {
    "path": "dvc/repo/experiments/queue/tempdir.py",
    "content": "import os\nfrom collections import defaultdict\nfrom collections.abc import Collection, Generator\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import first\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.repo.experiments.exceptions import ExpQueueEmptyError\nfrom dvc.repo.experiments.executor.base import ExecutorInfo, TaskStatus\nfrom dvc.repo.experiments.executor.local import TempDirExecutor\nfrom dvc.repo.experiments.utils import EXEC_PID_DIR, EXEC_TMP_DIR\nfrom dvc.utils.objects import cached_property\n\nfrom .base import BaseStashQueue, QueueEntry, QueueGetResult\nfrom .utils import fetch_running_exp_from_temp_dir\nfrom .workspace import WorkspaceQueue\n\nif TYPE_CHECKING:\n    from dvc.repo.experiments import Experiments\n    from dvc.repo.experiments.executor.base import BaseExecutor, ExecutorResult\n    from dvc.repo.experiments.serialize import ExpRange\n    from dvc_task.proc.manager import ProcessManager\n\nlogger = logger.getChild(__name__)\n\n\n_STANDALONE_TMP_DIR = os.path.join(EXEC_TMP_DIR, \"standalone\")\n\n\nclass TempDirQueue(WorkspaceQueue):\n    \"\"\"Standalone/tempdir exp queue implementation.\"\"\"\n\n    _EXEC_NAME: Optional[str] = None\n\n    @cached_property\n    def _standalone_tmp_dir(self) -> str:\n        assert self.repo.tmp_dir is not None\n        return os.path.join(self.repo.tmp_dir, _STANDALONE_TMP_DIR)\n\n    @cached_property\n    def pid_dir(self) -> str:\n        return os.path.join(self._standalone_tmp_dir, EXEC_PID_DIR)\n\n    @cached_property\n    def proc(self) -> \"ProcessManager\":\n        from dvc_task.proc.manager import ProcessManager\n\n        return ProcessManager(self.pid_dir)\n\n    def get(self) -> QueueGetResult:\n        revs = self.stash.stash_revs\n        if not revs:\n            raise ExpQueueEmptyError(\"No stashed standalone experiments.\")\n        stash_rev, stash_entry = first(revs.items())\n        entry = QueueEntry(\n            self.repo.root_dir,\n            self.scm.root_dir,\n            self.ref,\n            stash_rev,\n            stash_entry.baseline_rev,\n            stash_entry.branch,\n            stash_entry.name,\n            stash_entry.head_rev,\n        )\n        executor = self.init_executor(\n            self.repo.experiments,\n            entry,\n            TempDirExecutor,\n            wdir=self._standalone_tmp_dir,\n        )\n        return QueueGetResult(entry, executor)\n\n    def iter_active(self) -> Generator[QueueEntry, None, None]:\n        # NOTE: Yielded queue entries are not complete for performance reasons.\n        # Retrieving exec ref information is unavailable without doing a\n        # git-fetch, and is unneeded in the common use cases for iter_active.\n        for stash_rev in self.proc:\n            infofile = self.get_infofile_path(stash_rev)\n            executor_info = ExecutorInfo.load_json(infofile)\n            if executor_info.status <= TaskStatus.SUCCESS and os.path.exists(\n                executor_info.root_dir\n            ):\n                yield QueueEntry(\n                    self.repo.root_dir,\n                    self.scm.root_dir,\n                    self.ref,\n                    stash_rev,\n                    executor_info.baseline_rev,\n                    None,  # branch unavailable without doing a git-fetch\n                    executor_info.name,\n                    None,\n                )\n\n    def _reproduce_entry(\n        self,\n        entry: QueueEntry,\n        executor: \"BaseExecutor\",\n        copy_paths: Optional[list[str]] = None,\n        message: Optional[str] = None,\n        **kwargs,\n    ) -> dict[str, dict[str, str]]:\n        results: dict[str, dict[str, str]] = defaultdict(dict)\n        exec_name = self._EXEC_NAME or entry.stash_rev\n        infofile = self.get_infofile_path(exec_name)\n        try:\n            rev = entry.stash_rev\n            exec_result = executor.reproduce(\n                info=executor.info,\n                rev=rev,\n                infofile=infofile,\n                log_level=logger.getEffectiveLevel(),\n                log_errors=True,\n                copy_paths=copy_paths,\n                message=message,\n            )\n            if not exec_result.exp_hash:\n                raise DvcException(  # noqa: TRY301\n                    f\"Failed to reproduce experiment '{rev[:7]}'\"\n                )\n            if exec_result.ref_info:\n                results[rev].update(\n                    self.collect_executor(self.repo.experiments, executor, exec_result)\n                )\n        except DvcException:\n            raise\n        except Exception as exc:\n            raise DvcException(f\"Failed to reproduce experiment '{rev[:7]}'\") from exc\n        finally:\n            executor.cleanup(infofile)\n        return results\n\n    @staticmethod\n    def collect_executor(\n        exp: \"Experiments\",\n        executor: \"BaseExecutor\",\n        exec_result: \"ExecutorResult\",\n    ) -> dict[str, str]:\n        return BaseStashQueue.collect_executor(exp, executor, exec_result)\n\n    def collect_active_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        fetch_refs: bool = False,\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        from dvc.repo import Repo\n        from dvc.repo.experiments.collect import collect_exec_branch\n        from dvc.repo.experiments.serialize import (\n            ExpExecutor,\n            ExpRange,\n            LocalExpExecutor,\n        )\n\n        result: dict[str, list[ExpRange]] = defaultdict(list)\n        for entry in self.iter_active():\n            if baseline_revs and entry.baseline_rev not in baseline_revs:\n                continue\n            if fetch_refs:\n                fetch_running_exp_from_temp_dir(self, entry.stash_rev, fetch_refs)\n            proc_info = self.proc.get(entry.stash_rev)\n            infofile = self.get_infofile_path(entry.stash_rev)\n            executor_info = ExecutorInfo.load_json(infofile)\n            if proc_info:\n                local_exec: Optional[LocalExpExecutor] = LocalExpExecutor(\n                    root=executor_info.root_dir,\n                    log=proc_info.stdout,\n                    pid=proc_info.pid,\n                )\n            else:\n                local_exec = None\n            dvc_root = os.path.join(executor_info.root_dir, executor_info.dvc_dir)\n            with Repo(dvc_root) as repo:\n                exps = list(\n                    collect_exec_branch(repo, executor_info.baseline_rev, **kwargs)\n                )\n            exps[0].rev = entry.stash_rev\n            exps[0].name = entry.name\n            result[entry.baseline_rev].append(\n                ExpRange(\n                    exps,\n                    executor=ExpExecutor(\n                        \"running\",\n                        name=executor_info.location,\n                        local=local_exec,\n                    ),\n                    name=entry.name,\n                )\n            )\n        return result\n"
  },
  {
    "path": "dvc/repo/experiments/queue/utils.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom scmrepo.exceptions import SCMError\n\nfrom dvc.log import logger\nfrom dvc.repo.experiments.executor.base import ExecutorInfo, TaskStatus\nfrom dvc.repo.experiments.refs import EXEC_NAMESPACE, EXPS_NAMESPACE, EXPS_STASH\nfrom dvc.repo.experiments.utils import get_exp_rwlock, iter_remote_refs\n\nlogger = logger.getChild(__name__)\n\n\nif TYPE_CHECKING:\n    from dvc.scm import Git\n\n    from .base import BaseStashQueue\n\n\ndef get_remote_executor_refs(scm: \"Git\", remote_url: str) -> list[str]:\n    \"\"\"Get result list refs from a remote repository\n\n    Args:\n        remote_url : remote executor's url\n    \"\"\"\n    refs = []\n    for ref in iter_remote_refs(scm, remote_url, base=EXPS_NAMESPACE):\n        if not ref.startswith(EXEC_NAMESPACE) and ref != EXPS_STASH:\n            refs.append(ref)  # noqa: PERF401\n    return refs\n\n\ndef fetch_running_exp_from_temp_dir(\n    queue: \"BaseStashQueue\", rev: str, fetch_refs: bool\n) -> dict[str, dict]:\n    \"\"\"Fetch status of running exps out of current working directory\n\n    Args:\n        queue (BaseStashQueue):\n        rev (str): stash revision of the experiment\n        fetch_refs (bool): fetch running checkpoint results to local or not.\n\n    Returns:\n        Dict[str, Dict]: _description_\n    \"\"\"\n    from dvc.repo.experiments.executor.local import TempDirExecutor\n    from dvc.scm import InvalidRemoteSCMRepo\n    from dvc.utils.serialize import load_json\n\n    result: dict[str, dict] = {}\n    infofile = queue.get_infofile_path(rev)\n    try:\n        info = ExecutorInfo.from_dict(load_json(infofile))\n    except OSError:\n        return result\n    if info.status <= TaskStatus.RUNNING:\n        result[rev] = info.asdict()\n        if info.git_url and fetch_refs and info.status > TaskStatus.PREPARING:\n\n            def on_diverged(_ref: str):\n                return True\n\n            executor = TempDirExecutor.from_info(info)\n            try:\n                refs = get_remote_executor_refs(queue.scm, executor.git_url)\n                with get_exp_rwlock(queue.repo, writes=refs):\n                    for ref in executor.fetch_exps(\n                        queue.scm,\n                        refs,\n                        on_diverged=on_diverged,\n                    ):\n                        logger.debug(\"Updated running experiment '%s'.\", ref)\n                        last_rev = queue.scm.get_ref(ref)\n                        result[rev][\"last\"] = last_rev\n                        if last_rev:\n                            result[last_rev] = info.asdict()\n            except (InvalidRemoteSCMRepo, SCMError):\n                # ignore stale info files\n                del result[rev]\n    return result\n"
  },
  {
    "path": "dvc/repo/experiments/queue/workspace.py",
    "content": "import json\nimport os\nfrom collections import defaultdict\nfrom collections.abc import Collection, Generator\nfrom typing import TYPE_CHECKING, Optional\n\nimport psutil\nfrom funcy import first\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.repo.experiments.exceptions import ExpQueueEmptyError\nfrom dvc.repo.experiments.executor.base import ExecutorInfo, TaskStatus\nfrom dvc.repo.experiments.executor.local import WorkspaceExecutor\nfrom dvc.repo.experiments.refs import EXEC_BRANCH, WORKSPACE_STASH\nfrom dvc.repo.experiments.utils import get_exp_rwlock\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import load_json\n\nfrom .base import BaseStashQueue, QueueEntry, QueueGetResult\n\nif TYPE_CHECKING:\n    from dvc.repo.experiments import Experiments\n    from dvc.repo.experiments.executor.base import BaseExecutor, ExecutorResult\n    from dvc.repo.experiments.serialize import ExpRange\n\n    from .base import QueueDoneResult\n\nlogger = logger.getChild(__name__)\n\n\nclass WorkspaceQueue(BaseStashQueue):\n    _EXEC_NAME: Optional[str] = \"workspace\"\n\n    def put(self, *args, **kwargs) -> QueueEntry:\n        kwargs.pop(\"copy_paths\", None)\n        with get_exp_rwlock(self.repo, writes=[\"workspace\", WORKSPACE_STASH]):\n            return self._stash_exp(*args, **kwargs)\n\n    def get(self) -> QueueGetResult:\n        revs = self.stash.stash_revs\n        if not revs:\n            raise ExpQueueEmptyError(\"No experiments in the queue.\")\n        stash_rev, stash_entry = first(revs.items())\n        entry = QueueEntry(\n            self.repo.root_dir,\n            self.scm.root_dir,\n            self.ref,\n            stash_rev,\n            stash_entry.baseline_rev,\n            stash_entry.branch,\n            stash_entry.name,\n            stash_entry.head_rev,\n        )\n        executor = self.init_executor(self.repo.experiments, entry)\n        return QueueGetResult(entry, executor)\n\n    def iter_queued(self) -> Generator[QueueEntry, None, None]:\n        for rev, entry in self.stash.stash_revs.items():\n            yield QueueEntry(\n                self.repo.root_dir,\n                self.scm.root_dir,\n                self.ref,\n                rev,\n                entry.baseline_rev,\n                entry.branch,\n                entry.name,\n                entry.head_rev,\n            )\n\n    def iter_active(self) -> Generator[QueueEntry, None, None]:\n        # Workspace run state is reflected in the workspace itself and does not\n        # need to be handled via the queue\n        raise NotImplementedError\n\n    def iter_done(self) -> Generator[\"QueueDoneResult\", None, None]:\n        raise NotImplementedError\n\n    def iter_failed(self) -> Generator[\"QueueDoneResult\", None, None]:\n        raise NotImplementedError\n\n    def iter_success(self) -> Generator[\"QueueDoneResult\", None, None]:\n        raise NotImplementedError\n\n    def reproduce(\n        self, copy_paths: Optional[list[str]] = None, message: Optional[str] = None\n    ) -> dict[str, dict[str, str]]:\n        results: dict[str, dict[str, str]] = defaultdict(dict)\n        try:\n            while True:\n                entry, executor = self.get()\n                results.update(\n                    self._reproduce_entry(\n                        entry, executor, copy_paths=copy_paths, message=message\n                    )\n                )\n        except ExpQueueEmptyError:\n            pass\n        return results\n\n    def _reproduce_entry(\n        self, entry: QueueEntry, executor: \"BaseExecutor\", **kwargs\n    ) -> dict[str, dict[str, str]]:\n        kwargs.pop(\"copy_paths\", None)\n        from dvc_task.proc.process import ProcessInfo\n\n        results: dict[str, dict[str, str]] = defaultdict(dict)\n        exec_name = self._EXEC_NAME or entry.stash_rev\n        proc_info = ProcessInfo(os.getpid(), None, None, None, None)\n        proc_info_path = self._proc_info_path(exec_name)\n        os.makedirs(os.path.dirname(proc_info_path), exist_ok=True)\n        proc_info.dump(proc_info_path)\n        infofile = self.get_infofile_path(exec_name)\n        try:\n            rev = entry.stash_rev\n            exec_result = executor.reproduce(\n                info=executor.info,\n                rev=rev,\n                infofile=infofile,\n                log_level=logger.getEffectiveLevel(),\n                log_errors=not isinstance(executor, WorkspaceExecutor),\n                message=kwargs.get(\"message\"),\n            )\n            if not exec_result.exp_hash:\n                raise DvcException(  # noqa: TRY301\n                    f\"Failed to reproduce experiment '{rev[:7]}'\"\n                )\n            if exec_result.ref_info:\n                results[rev].update(\n                    self.collect_executor(self.repo.experiments, executor, exec_result)\n                )\n        except DvcException:\n            raise\n        except Exception as exc:\n            raise DvcException(f\"Failed to reproduce experiment '{rev[:7]}'\") from exc\n        finally:\n            executor.cleanup(infofile)\n            remove(self._proc_info_path(exec_name))\n        return results\n\n    def _proc_info_path(self, name: str) -> str:\n        return os.path.join(self.pid_dir, name, f\"{name}.json\")\n\n    @property\n    def _active_pid(self) -> Optional[int]:\n        from dvc_task.proc.process import ProcessInfo\n\n        assert self._EXEC_NAME\n        name = self._EXEC_NAME\n        try:\n            proc_info = ProcessInfo.load(self._proc_info_path(name))\n            pid = proc_info.pid\n            if psutil.pid_exists(pid):\n                return pid\n            logger.debug(\"Workspace exec PID '%d' no longer exists, removing.\", pid)\n            remove(self._proc_info_path(name))\n        except (FileNotFoundError, json.JSONDecodeError):\n            pass\n        return None\n\n    @staticmethod\n    def collect_executor(\n        exp: \"Experiments\",\n        executor: \"BaseExecutor\",  # noqa: ARG004\n        exec_result: \"ExecutorResult\",\n    ) -> dict[str, str]:\n        results: dict[str, str] = {}\n        exp_rev = exp.scm.get_ref(EXEC_BRANCH)\n        if exp_rev:\n            assert exec_result.exp_hash\n            logger.debug(\"Collected experiment '%s'.\", exp_rev[:7])\n            results[exp_rev] = exec_result.exp_hash\n\n        return results\n\n    def get_result(self, entry: QueueEntry) -> Optional[\"ExecutorResult\"]:\n        raise NotImplementedError\n\n    def kill(self, revs: Collection[str]) -> None:\n        raise NotImplementedError\n\n    def shutdown(self, kill: bool = False):\n        raise NotImplementedError\n\n    def logs(self, rev: str, encoding: Optional[str] = None, follow: bool = False):\n        raise NotImplementedError\n\n    def get_running_exp(self) -> Optional[str]:\n        \"\"\"Return the name of the exp running in workspace (if it exists).\"\"\"\n        assert self._EXEC_NAME\n        if self._active_pid is None:\n            return None\n\n        infofile = self.get_infofile_path(self._EXEC_NAME)\n        try:\n            info = ExecutorInfo.from_dict(load_json(infofile))\n        except OSError:\n            return None\n        return info.name\n\n    def collect_active_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        fetch_refs: bool = False,  # noqa: ARG002\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        from dvc.repo.experiments.collect import collect_exec_branch\n        from dvc.repo.experiments.serialize import (\n            ExpExecutor,\n            ExpRange,\n            LocalExpExecutor,\n        )\n\n        result: dict[str, list[ExpRange]] = defaultdict(list)\n        pid = self._active_pid\n        if pid is None:\n            return result\n\n        assert self._EXEC_NAME\n        infofile = self.get_infofile_path(self._EXEC_NAME)\n        try:\n            info = ExecutorInfo.from_dict(load_json(infofile))\n        except OSError:\n            return result\n\n        if (\n            (not baseline_revs or info.baseline_rev in baseline_revs)\n            and info.status < TaskStatus.FAILED\n            and info.status != TaskStatus.SUCCESS\n        ):\n            local_exec = LocalExpExecutor(root=info.root_dir, pid=pid)\n            exps = list(collect_exec_branch(self.repo, info.baseline_rev, **kwargs))\n            exps[0].name = info.name\n            result[info.baseline_rev] = [\n                ExpRange(\n                    exps,\n                    executor=ExpExecutor(\"running\", name=\"workspace\", local=local_exec),\n                    name=info.name,\n                )\n            ]\n        return result\n\n    def collect_queued_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        raise NotImplementedError\n\n    def collect_failed_data(\n        self,\n        baseline_revs: Optional[Collection[str]],\n        **kwargs,\n    ) -> dict[str, list[\"ExpRange\"]]:\n        raise NotImplementedError\n"
  },
  {
    "path": "dvc/repo/experiments/refs.py",
    "content": "from typing import Optional\n\nfrom .exceptions import InvalidExpRefError\n\n# Experiment refs are stored according baseline git SHA:\n#   refs/exps/01/234abcd.../<exp_name>\nEXPS_NAMESPACE = \"refs/exps\"\nEXPS_STASH = f\"{EXPS_NAMESPACE}/stash\"\nWORKSPACE_STASH = EXPS_STASH\nAPPLY_NAMESPACE = f\"{EXPS_NAMESPACE}/apply\"\nAPPLY_HEAD = f\"{APPLY_NAMESPACE}/ORIG_HEAD\"\nAPPLY_STASH = f\"{APPLY_NAMESPACE}/stash\"\nCELERY_NAMESPACE = f\"{EXPS_NAMESPACE}/celery\"\nCELERY_STASH = f\"{CELERY_NAMESPACE}/stash\"\nCELERY_FAILED_STASH = f\"{CELERY_NAMESPACE}/failed\"\nEXEC_NAMESPACE = f\"{EXPS_NAMESPACE}/exec\"\nEXEC_APPLY = f\"{EXEC_NAMESPACE}/EXEC_APPLY\"\nEXEC_BRANCH = f\"{EXEC_NAMESPACE}/EXEC_BRANCH\"\nEXEC_BASELINE = f\"{EXEC_NAMESPACE}/EXEC_BASELINE\"\nEXEC_HEAD = f\"{EXEC_NAMESPACE}/EXEC_HEAD\"\nEXEC_MERGE = f\"{EXEC_NAMESPACE}/EXEC_MERGE\"\nTEMP_NAMESPACE = f\"{EXPS_NAMESPACE}/temp\"\nSTASHES = {WORKSPACE_STASH, CELERY_STASH}\nITER_SKIP_NAMESPACES = {\n    APPLY_NAMESPACE,\n    CELERY_NAMESPACE,\n    EXEC_NAMESPACE,\n    TEMP_NAMESPACE,\n}\n\n\nclass ExpRefInfo:\n    namespace = EXPS_NAMESPACE\n\n    def __init__(self, baseline_sha: str, name: Optional[str] = None):\n        self.baseline_sha = baseline_sha\n        self.name: str = name if name else \"\"\n\n    def __str__(self):\n        return \"/\".join(self.parts)\n\n    def __repr__(self):\n        baseline = f\"'{self.baseline_sha}'\"\n        name = f\"'{self.name}'\" if self.name else \"None\"\n        return f\"ExpRefInfo(baseline_sha={baseline}, name={name})\"\n\n    @property\n    def parts(self):\n        return (\n            (self.namespace,)\n            + ((self.baseline_sha[:2], self.baseline_sha[2:]))\n            + ((self.name,) if self.name else ())\n        )\n\n    @classmethod\n    def from_ref(cls, ref: str):\n        try:\n            parts = ref.split(\"/\")\n            if (\n                len(parts) < 4\n                or len(parts) > 5\n                or \"/\".join(parts[:2]) != EXPS_NAMESPACE\n            ):\n                raise InvalidExpRefError(ref)\n        except ValueError:\n            raise InvalidExpRefError(ref)  # noqa: B904\n        baseline_sha = parts[2] + parts[3]\n        name = parts[4] if len(parts) == 5 else None\n        return cls(baseline_sha, name)\n\n    def __eq__(self, other):\n        if not isinstance(other, ExpRefInfo):\n            return False\n\n        return self.baseline_sha == other.baseline_sha and self.name == other.name\n\n    def __hash__(self):\n        return hash((self.baseline_sha, self.name))\n"
  },
  {
    "path": "dvc/repo/experiments/remove.py",
    "content": "from collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Optional, Union\n\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.scm import Git, iter_revs\n\nfrom .exceptions import InvalidArgumentError, UnresolvedExpNamesError\nfrom .utils import exp_refs, exp_refs_by_baseline, push_refspec\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.experiments.queue.celery import LocalCeleryQueue\n\n    from .queue.base import ExpRefAndQueueEntry, QueueEntry\n    from .refs import ExpRefInfo\n\n\nlogger = logger.getChild(__name__)\n\n\n@locked\n@scm_context\ndef remove(  # noqa: C901, PLR0912\n    repo: \"Repo\",\n    exp_names: Union[str, list[str], None] = None,\n    rev: Optional[Union[list[str], str]] = None,\n    all_commits: bool = False,\n    num: int = 1,\n    queue: bool = False,\n    git_remote: Optional[str] = None,\n    keep: bool = False,\n) -> list[str]:\n    removed: list[str] = []\n\n    if all([keep, queue]):\n        raise InvalidArgumentError(\"Cannot use both `--keep` and `--queue`.\")\n\n    if not any([exp_names, queue, all_commits, rev]):\n        return removed\n\n    celery_queue: LocalCeleryQueue = repo.experiments.celery_queue\n\n    if queue:\n        removed.extend(celery_queue.clear(queued=True))\n\n    assert isinstance(repo.scm, Git)\n\n    exp_ref_list: list[ExpRefInfo] = []\n    queue_entry_list: list[QueueEntry] = []\n\n    if exp_names:\n        results: dict[str, ExpRefAndQueueEntry] = (\n            celery_queue.get_ref_and_entry_by_names(exp_names, git_remote)\n        )\n        remained: list[str] = []\n        for name, result in results.items():\n            if not result.exp_ref_info and not result.queue_entry:\n                remained.append(name)\n                continue\n            removed.append(name)\n            if result.exp_ref_info:\n                exp_ref_list.append(result.exp_ref_info)\n            if result.queue_entry:\n                queue_entry_list.append(result.queue_entry)\n\n        if remained:\n            raise UnresolvedExpNamesError(remained, git_remote=git_remote)\n    elif rev:\n        if isinstance(rev, str):\n            rev = [rev]\n        exp_ref_dict = _resolve_exp_by_baseline(repo, rev, num, git_remote)\n        removed.extend(exp_ref_dict.keys())\n        exp_ref_list.extend(exp_ref_dict.values())\n    elif all_commits:\n        exp_ref_list.extend(exp_refs(repo.scm, git_remote))\n        removed.extend([ref.name for ref in exp_ref_list])\n\n    if keep:\n        exp_ref_list = list(set(exp_refs(repo.scm, git_remote)) - set(exp_ref_list))\n        removed = [ref.name for ref in exp_ref_list]\n\n    if exp_ref_list:\n        _remove_commited_exps(repo.scm, exp_ref_list, git_remote)\n\n    if queue_entry_list:\n        from .queue.remove import remove_tasks\n\n        remove_tasks(celery_queue, queue_entry_list)\n\n    if git_remote:\n        from .push import notify_refs_to_studio\n\n        removed_refs = [str(r) for r in exp_ref_list]\n        notify_refs_to_studio(repo, git_remote, removed=removed_refs)\n\n    return removed\n\n\ndef _resolve_exp_by_baseline(\n    repo: \"Repo\",\n    rev: list[str],\n    num: int,\n    git_remote: Optional[str] = None,\n) -> dict[str, \"ExpRefInfo\"]:\n    assert isinstance(repo.scm, Git)\n\n    commit_ref_dict: dict[str, ExpRefInfo] = {}\n    rev_dict = iter_revs(repo.scm, rev, num)\n    rev_set = set(rev_dict.keys())\n    ref_info_dict = exp_refs_by_baseline(repo.scm, rev_set, git_remote)\n    for ref_info_list in ref_info_dict.values():\n        for ref_info in ref_info_list:\n            commit_ref_dict[ref_info.name] = ref_info\n    return commit_ref_dict\n\n\ndef _remove_commited_exps(\n    scm: \"Git\", exp_refs_list: Iterable[\"ExpRefInfo\"], remote: Optional[str]\n) -> list[str]:\n    if remote:\n        from dvc.scm import TqdmGit\n\n        for ref_info in exp_refs_list:\n            with TqdmGit(desc=\"Pushing git refs\") as pbar:\n                push_refspec(\n                    scm,\n                    remote,\n                    [(None, str(ref_info))],\n                    progress=pbar.update_git,\n                )\n    else:\n        from .utils import remove_exp_refs\n\n        remove_exp_refs(scm, exp_refs_list)\n    return [exp_ref.name for exp_ref in exp_refs_list]\n"
  },
  {
    "path": "dvc/repo/experiments/rename.py",
    "content": "from typing import TYPE_CHECKING, Optional, Union\n\nfrom dvc.log import logger\nfrom dvc.repo.experiments.exceptions import (\n    ExperimentExistsError,\n    UnresolvedExpNamesError,\n)\nfrom dvc.repo.experiments.utils import check_ref_format, resolve_name\nfrom dvc.scm import Git\n\nfrom .refs import ExpRefInfo\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\ndef rename(\n    repo: \"Repo\",\n    new_name: str,\n    exp_name: Union[str, None] = None,\n    git_remote: Optional[str] = None,\n    force: bool = False,\n) -> Union[list[str], None]:\n    renamed: list[str] = []\n    remained: list[str] = []\n    assert isinstance(repo.scm, Git)\n\n    if exp_name == new_name:\n        return None\n\n    if exp_name:\n        results: dict[str, Union[ExpRefInfo, None]] = resolve_name(\n            scm=repo.scm, exp_names=exp_name, git_remote=git_remote\n        )\n        for name, result in results.items():\n            if result is None:\n                remained.append(name)\n                continue\n\n            new_ref = ExpRefInfo(baseline_sha=result.baseline_sha, name=new_name)\n            if repo.scm.get_ref(str(new_ref)) and not force:\n                raise ExperimentExistsError(new_name)\n\n            check_ref_format(repo.scm, new_ref)\n            _rename_exp(scm=repo.scm, ref_info=result, new_name=new_name)\n            renamed.append(name)\n\n    if remained:\n        raise UnresolvedExpNamesError(remained, git_remote=git_remote)\n\n    return renamed\n\n\ndef _rename_exp(scm: \"Git\", ref_info: \"ExpRefInfo\", new_name: str):\n    rev = scm.get_ref(str(ref_info))\n    scm.remove_ref(str(ref_info))\n    ref_info.name = new_name\n    scm.set_ref(str(ref_info), rev)\n    return new_name\n"
  },
  {
    "path": "dvc/repo/experiments/run.py",
    "content": "from collections.abc import Iterable\nfrom typing import Optional\n\nfrom dvc.dependency.param import ParamsDependency\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.ui import ui\nfrom dvc.utils.cli_parse import to_path_overrides\n\nlogger = logger.getChild(__name__)\n\n\n@locked\ndef run(  # noqa: C901, PLR0912\n    repo,\n    targets: Optional[Iterable[str]] = None,\n    params: Optional[Iterable[str]] = None,\n    run_all: bool = False,\n    jobs: int = 1,\n    tmp_dir: bool = False,\n    queue: bool = False,\n    copy_paths: Optional[Iterable[str]] = None,\n    message: Optional[str] = None,\n    no_hydra: bool = False,\n    **kwargs,\n) -> dict[str, str]:\n    \"\"\"Reproduce the specified targets as an experiment.\n\n    Accepts the same additional kwargs as Repo.reproduce.\n\n    Returns a dict mapping new experiment SHAs to the results\n    of `repro` for that experiment.\n    \"\"\"\n    if kwargs.get(\"dry\"):\n        tmp_dir = True\n\n    if run_all:\n        return repo.experiments.reproduce_celery(jobs=jobs)\n\n    hydra_sweep = None\n    if params:\n        from dvc.utils.hydra import to_hydra_overrides\n\n        path_overrides = to_path_overrides(params)\n\n        if tmp_dir or queue:\n            untracked = repo.scm.untracked_files()\n            for path in path_overrides:\n                if path in untracked:\n                    logger.debug(\n                        \"'%s' is currently untracked but will be modified by DVC. \"\n                        \"Adding it to git.\",\n                        path,\n                    )\n                    repo.scm.add([path])\n\n        hydra_sweep = any(\n            x.is_sweep_override()\n            for param_file in path_overrides\n            for x in to_hydra_overrides(path_overrides[param_file])\n        )\n\n        if hydra_sweep and not queue:\n            raise InvalidArgumentError(\n                \"Sweep overrides can't be used without `--queue`\"\n            )\n    else:\n        path_overrides = {}\n\n    hydra_enabled = repo.config.get(\"hydra\", {}).get(\"enabled\", False) and not no_hydra\n    hydra_output_file = ParamsDependency.DEFAULT_PARAMS_FILE\n    if hydra_enabled and hydra_output_file not in path_overrides:\n        # Force `_update_params` even if `--set-param` was not used\n        path_overrides[hydra_output_file] = []\n\n    if not queue:\n        return repo.experiments.reproduce_one(\n            targets=targets,\n            params=path_overrides,\n            tmp_dir=tmp_dir,\n            copy_paths=copy_paths,\n            message=message,\n            no_hydra=no_hydra,\n            **kwargs,\n        )\n\n    if hydra_sweep:\n        from dvc.utils.hydra import get_hydra_sweeps\n\n        sweeps = get_hydra_sweeps(path_overrides)\n        name_prefix = kwargs.get(\"name\")\n    else:\n        sweeps = [path_overrides]\n\n    for idx, sweep_overrides in enumerate(sweeps):\n        if hydra_sweep and name_prefix is not None:\n            kwargs[\"name\"] = f\"{name_prefix}-{idx + 1}\"\n        queue_entry = repo.experiments.queue_one(\n            repo.experiments.celery_queue,\n            targets=targets,\n            params=sweep_overrides,\n            copy_paths=copy_paths,\n            message=message,\n            no_hydra=no_hydra,\n            **kwargs,\n        )\n        if sweep_overrides:\n            ui.write(f\"Queueing with overrides '{sweep_overrides}'.\")\n        name = queue_entry.name or queue_entry.stash_rev[:7]\n        ui.write(f\"Queued experiment '{name}' for future execution.\")\n\n    return {}\n"
  },
  {
    "path": "dvc/repo/experiments/save.py",
    "content": "import os\nfrom collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import first\n\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n\nlogger = logger.getChild(__name__)\n\n\ndef save(\n    repo: \"Repo\",\n    targets: Optional[Iterable[str]] = None,\n    name: Optional[str] = None,\n    recursive: bool = False,\n    force: bool = False,\n    include_untracked: Optional[list[str]] = None,\n    message: Optional[str] = None,\n) -> Optional[str]:\n    \"\"\"Save the current workspace status as an experiment.\n\n    Returns the saved experiment's SHAs.\n    \"\"\"\n    logger.debug(\"Saving workspace in %s\", os.getcwd())\n\n    queue = repo.experiments.workspace_queue\n    entry = repo.experiments.new(queue=queue, name=name, force=force)\n    executor = queue.init_executor(repo.experiments, entry)\n\n    try:\n        save_result = executor.save(\n            executor.info,\n            targets=targets,\n            recursive=recursive,\n            force=force,\n            include_untracked=include_untracked,\n            message=message,\n        )\n        result = queue.collect_executor(repo.experiments, executor, save_result)\n    finally:\n        executor.cleanup()\n\n    return first(result)\n"
  },
  {
    "path": "dvc/repo/experiments/serialize.py",
    "content": "import json\nfrom collections.abc import Iterator\nfrom dataclasses import asdict, dataclass, field\nfrom datetime import datetime\nfrom typing import TYPE_CHECKING, Any, Literal, Optional\n\nfrom dvc.exceptions import DvcException\nfrom dvc.repo.metrics.show import _gather_metrics\nfrom dvc.repo.params.show import _gather_params\nfrom dvc.utils import relpath\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.metrics.show import FileResult\n\n\nclass DeserializeError(DvcException):\n    pass\n\n\nclass _ISOEncoder(json.JSONEncoder):\n    def default(self, o: object) -> Any:\n        if isinstance(o, datetime):\n            return o.isoformat()\n        return super().default(o)\n\n\n@dataclass(frozen=True)\nclass SerializableExp:\n    \"\"\"Serializable experiment data.\"\"\"\n\n    rev: str\n    timestamp: Optional[datetime] = None\n    params: dict[str, \"FileResult\"] = field(default_factory=dict)\n    metrics: dict[str, \"FileResult\"] = field(default_factory=dict)\n    deps: dict[str, \"ExpDep\"] = field(default_factory=dict)\n    outs: dict[str, \"ExpOut\"] = field(default_factory=dict)\n    meta: dict[str, Any] = field(default_factory=dict)\n\n    @classmethod\n    def from_repo(\n        cls,\n        repo: \"Repo\",\n        rev: Optional[str] = None,\n        param_deps: bool = False,\n        **kwargs,\n    ) -> \"SerializableExp\":\n        \"\"\"Returns a SerializableExp from the current repo state.\n\n        Params, metrics, deps, outs are filled via repo fs/index, all other fields\n        should be passed via kwargs.\n        \"\"\"\n        from dvc.dependency import (\n            DatasetDependency,\n            DbDependency,\n            ParamsDependency,\n            RepoDependency,\n        )\n\n        rev = rev or repo.get_rev()\n        assert rev\n\n        params = _gather_params(repo, deps_only=param_deps, on_error=\"return\")\n        metrics = _gather_metrics(repo, on_error=\"return\")\n        return cls(\n            rev=rev,\n            params=params,\n            metrics=metrics,\n            deps={\n                relpath(dep.fs_path, repo.root_dir): ExpDep(\n                    hash=dep.hash_info.value if dep.hash_info else None,\n                    size=dep.meta.size if dep.meta else None,\n                    nfiles=dep.meta.nfiles if dep.meta else None,\n                )\n                for dep in repo.index.deps\n                if not isinstance(\n                    dep,\n                    (ParamsDependency, RepoDependency, DatasetDependency, DbDependency),\n                )\n            },\n            outs={\n                relpath(out.fs_path, repo.root_dir): ExpOut(\n                    hash=out.hash_info.value if out.hash_info else None,\n                    size=out.meta.size if out.meta else None,\n                    nfiles=out.meta.nfiles if out.meta else None,\n                    use_cache=out.use_cache,\n                    is_data_source=out.stage.is_data_source,\n                )\n                for out in repo.index.outs\n                if not (out.is_metric or out.is_plot)\n            },\n            **kwargs,\n        )\n\n    def dumpd(self) -> dict[str, Any]:\n        return asdict(self)\n\n    def as_bytes(self) -> bytes:\n        return _ISOEncoder().encode(self.dumpd()).encode(\"utf-8\")\n\n    @classmethod\n    def from_bytes(cls, data: bytes):\n        try:\n            parsed = json.loads(data)\n            if \"timestamp\" in parsed:\n                parsed[\"timestamp\"] = datetime.fromisoformat(parsed[\"timestamp\"])\n            if \"deps\" in parsed:\n                parsed[\"deps\"] = {k: ExpDep(**v) for k, v in parsed[\"deps\"].items()}\n            if \"outs\" in parsed:\n                parsed[\"outs\"] = {k: ExpOut(**v) for k, v in parsed[\"outs\"].items()}\n            return cls(**parsed)\n        except (TypeError, json.JSONDecodeError) as exc:\n            raise DeserializeError(\"failed to load SerializableExp\") from exc\n\n    @property\n    def contains_error(self) -> bool:\n        return any(value.get(\"error\") for value in self.params.values()) or any(\n            value.get(\"error\") for value in self.metrics.values()\n        )\n\n\n@dataclass(frozen=True)\nclass ExpDep:\n    hash: Optional[str]\n    size: Optional[int]\n    nfiles: Optional[int]\n\n\n@dataclass(frozen=True)\nclass ExpOut:\n    hash: Optional[str]\n    size: Optional[int]\n    nfiles: Optional[int]\n    use_cache: bool\n    is_data_source: bool\n\n\n@dataclass(frozen=True)\nclass SerializableError:\n    msg: str\n    type: str = \"\"\n\n    def dumpd(self) -> dict[str, Any]:\n        return asdict(self)\n\n    def as_bytes(self) -> bytes:\n        return json.dumps(self.dumpd()).encode(\"utf-8\")\n\n    @classmethod\n    def from_bytes(cls, data: bytes):\n        try:\n            parsed = json.loads(data)\n            return cls(**parsed)\n        except (TypeError, json.JSONDecodeError) as exc:\n            raise DeserializeError(\"failed to load SerializableError\") from exc\n\n\n@dataclass\nclass ExpState:\n    \"\"\"Git/DVC experiment state.\"\"\"\n\n    rev: str\n    name: Optional[str] = None\n    data: Optional[SerializableExp] = None\n    error: Optional[SerializableError] = None\n    experiments: Optional[list[\"ExpRange\"]] = None\n\n    def dumpd(self) -> dict[str, Any]:\n        return asdict(self)\n\n\n@dataclass\nclass ExpRange:\n    revs: list[\"ExpState\"]\n    executor: Optional[\"ExpExecutor\"] = None\n    name: Optional[str] = None\n\n    def __len__(self) -> int:\n        return len(self.revs)\n\n    def __iter__(self) -> Iterator[\"ExpState\"]:\n        return iter(self.revs)\n\n    def __getitem__(self, index: int) -> \"ExpState\":\n        return self.revs[index]\n\n    def dumpd(self) -> dict[str, Any]:\n        return asdict(self)\n\n\n@dataclass\nclass LocalExpExecutor:\n    root: Optional[str] = None\n    log: Optional[str] = None\n    pid: Optional[int] = None\n    returncode: Optional[int] = None\n    task_id: Optional[str] = None\n\n\n@dataclass\nclass ExpExecutor:\n    state: Literal[\"success\", \"queued\", \"running\", \"failed\"]\n    name: Optional[str] = None\n    local: Optional[LocalExpExecutor] = None\n"
  },
  {
    "path": "dvc/repo/experiments/show.py",
    "content": "from collections import Counter, defaultdict\nfrom collections.abc import Iterable, Iterator, Mapping\nfrom datetime import date, datetime\nfrom typing import TYPE_CHECKING, Any, Literal, NamedTuple, Optional, Union\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\nfrom dvc.scm import Git\nfrom dvc.ui import ui\nfrom dvc.utils.flatten import flatten\n\nfrom .collect import collect\n\nif TYPE_CHECKING:\n    from dvc.compare import TabularData\n    from dvc.repo import Repo\n    from dvc.ui.table import CellT\n\n    from .serialize import ExpRange, ExpState\n\nlogger = logger.getChild(__name__)\n\n\ndef show(\n    repo: \"Repo\",\n    revs: Union[list[str], str, None] = None,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    num: int = 1,\n    hide_queued: bool = False,\n    hide_failed: bool = False,\n    hide_workspace: bool = False,\n    sha_only: bool = False,\n    **kwargs,\n) -> list[\"ExpState\"]:\n    return collect(\n        repo,\n        revs=revs,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        num=num,\n        hide_queued=hide_queued,\n        hide_failed=hide_failed,\n        hide_workspace=hide_workspace,\n        sha_only=sha_only,\n        **kwargs,\n    )\n\n\ndef tabulate(\n    baseline_states: Iterable[\"ExpState\"],\n    fill_value: Optional[str] = \"-\",\n    error_value: str = \"!\",\n    **kwargs,\n) -> tuple[\"TabularData\", dict[str, Iterable[str]]]:\n    \"\"\"Return table data for experiments.\n\n    Returns:\n        Tuple of (table_data, data_headers)\n    \"\"\"\n    from funcy import lconcat\n    from funcy.seqs import flatten as flatten_list\n\n    from dvc.compare import TabularData\n\n    data_names = _collect_names(baseline_states)\n    metrics_names = data_names.metrics\n    params_names = data_names.params\n    deps_names = data_names.sorted_deps\n\n    headers = [\n        \"Experiment\",\n        \"rev\",\n        \"typ\",\n        \"Created\",\n        \"parent\",\n        \"State\",\n        \"Executor\",\n    ]\n    names = metrics_names | params_names\n    counter = Counter(flatten_list([list(a.keys()) for a in names.values()]))\n    counter.update(headers)\n    metrics_headers = _normalize_headers(metrics_names, counter)\n    params_headers = _normalize_headers(params_names, counter)\n\n    all_headers = lconcat(headers, metrics_headers, params_headers, deps_names)\n    td = TabularData(all_headers, fill_value=fill_value)\n    td.extend(\n        _build_rows(\n            baseline_states,\n            all_headers=all_headers,\n            metrics_headers=metrics_headers,\n            params_headers=params_headers,\n            metrics_names=metrics_names,\n            params_names=params_names,\n            deps_names=deps_names,\n            fill_value=fill_value,\n            error_value=error_value,\n            **kwargs,\n        )\n    )\n    data_headers: dict[str, Iterable[str]] = {\n        \"metrics\": metrics_headers,\n        \"params\": params_headers,\n        \"deps\": deps_names,\n    }\n    return td, data_headers\n\n\ndef _build_rows(\n    baseline_states: Iterable[\"ExpState\"],\n    *,\n    all_headers: Iterable[str],\n    fill_value: Optional[str],\n    sort_by: Optional[str] = None,\n    sort_order: Optional[Literal[\"asc\", \"desc\"]] = None,\n    **kwargs,\n) -> Iterator[tuple[\"CellT\", ...]]:\n    for baseline in baseline_states:\n        row: dict[str, CellT] = dict.fromkeys(all_headers, fill_value)\n        row[\"Experiment\"] = \"\"\n        if baseline.name:\n            row[\"rev\"] = baseline.name\n        elif Git.is_sha(baseline.rev):\n            row[\"rev\"] = baseline.rev[:7]\n        else:\n            row[\"rev\"] = baseline.rev\n        row[\"typ\"] = \"baseline\"\n        row[\"parent\"] = \"\"\n        if baseline.data:\n            row[\"Created\"] = format_time(\n                baseline.data.timestamp, fill_value=fill_value, **kwargs\n            )\n            row.update(_data_cells(baseline, fill_value=fill_value, **kwargs))\n        yield tuple(row.values())\n        if baseline.experiments:\n            if sort_by:\n                metrics_names: Mapping[str, Iterable[str]] = kwargs.get(\n                    \"metrics_names\", {}\n                )\n                params_names: Mapping[str, Iterable[str]] = kwargs.get(\n                    \"params_names\", {}\n                )\n                sort_path, sort_name, sort_type = _sort_column(\n                    sort_by, metrics_names, params_names\n                )\n                reverse = sort_order == \"desc\"\n                experiments = _sort_exp(\n                    baseline.experiments, sort_path, sort_name, sort_type, reverse\n                )\n            else:\n                experiments = baseline.experiments\n            for i, child in enumerate(experiments):\n                yield from _exp_range_rows(\n                    child,\n                    all_headers=all_headers,\n                    fill_value=fill_value,\n                    is_base=i == len(baseline.experiments) - 1,\n                    **kwargs,\n                )\n\n\ndef _sort_column(  # noqa: C901\n    sort_by: str,\n    metric_names: Mapping[str, Iterable[str]],\n    param_names: Mapping[str, Iterable[str]],\n) -> tuple[str, str, str]:\n    sep = \":\"\n    parts = sort_by.split(sep)\n    matches: set[tuple[str, str, str]] = set()\n\n    for split_num in range(len(parts)):\n        path = sep.join(parts[:split_num])\n        sort_name = sep.join(parts[split_num:])\n        if not path:  # handles ':metric_name' case\n            sort_by = sort_name\n        if path in metric_names and sort_name in metric_names[path]:\n            matches.add((path, sort_name, \"metrics\"))\n        if path in param_names and sort_name in param_names[path]:\n            matches.add((path, sort_name, \"params\"))\n    if not matches:\n        for path in metric_names:\n            if sort_by in metric_names[path]:\n                matches.add((path, sort_by, \"metrics\"))\n        for path in param_names:\n            if sort_by in param_names[path]:\n                matches.add((path, sort_by, \"params\"))\n\n    if len(matches) == 1:\n        return matches.pop()\n    if len(matches) > 1:\n        raise InvalidArgumentError(\n            \"Ambiguous sort column '{}' matched '{}'\".format(\n                sort_by,\n                \", \".join([f\"{path}:{name}\" for path, name, _ in matches]),\n            )\n        )\n    raise InvalidArgumentError(f\"Unknown sort column '{sort_by}'\")\n\n\ndef _sort_exp(\n    experiments: Iterable[\"ExpRange\"],\n    sort_path: str,\n    sort_name: str,\n    typ: str,\n    reverse: bool,\n) -> list[\"ExpRange\"]:\n    from funcy import first\n\n    def _sort(exp_range: \"ExpRange\"):\n        exp = first(exp_range.revs)\n        if not exp:\n            return True\n        data = exp.data.dumpd().get(typ, {}).get(sort_path, {}).get(\"data\", {})\n        val = flatten(data).get(sort_name)\n        return val is None, val\n\n    return sorted(experiments, key=_sort, reverse=reverse)\n\n\ndef _exp_range_rows(\n    exp_range: \"ExpRange\",\n    *,\n    all_headers: Iterable[str],\n    fill_value: Optional[str],\n    is_base: bool = False,\n    **kwargs,\n) -> Iterator[tuple[\"CellT\", ...]]:\n    from funcy import first\n\n    if len(exp_range.revs) > 1:\n        logger.debug(\"Returning tip commit for legacy checkpoint exp\")\n    exp = first(exp_range.revs)\n    if exp:\n        row: dict[str, CellT] = dict.fromkeys(all_headers, fill_value)\n        row[\"Experiment\"] = exp.name or \"\"\n        row[\"rev\"] = exp.rev[:7] if Git.is_sha(exp.rev) else exp.rev\n        row[\"typ\"] = \"branch_base\" if is_base else \"branch_commit\"\n        row[\"parent\"] = \"\"\n        if exp_range.executor:\n            row[\"State\"] = exp_range.executor.state.capitalize()\n            if exp_range.executor.name:\n                row[\"Executor\"] = exp_range.executor.name.capitalize()\n        if exp.data:\n            row[\"Created\"] = format_time(\n                exp.data.timestamp, fill_value=fill_value, **kwargs\n            )\n            row.update(_data_cells(exp, fill_value=fill_value, **kwargs))\n        yield tuple(row.values())\n\n\ndef _data_cells(\n    exp: \"ExpState\",\n    *,\n    metrics_headers: Iterable[str],\n    params_headers: Iterable[str],\n    metrics_names: Mapping[str, Iterable[str]],\n    params_names: Mapping[str, Iterable[str]],\n    deps_names: Iterable[str],\n    fill_value: Optional[str] = \"-\",\n    error_value: str = \"!\",\n    precision: Optional[int] = None,\n    **kwargs,\n) -> Iterator[tuple[str, \"CellT\"]]:\n    def _d_cells(\n        d: Mapping[str, Any],\n        names: Mapping[str, Iterable[str]],\n        headers: Iterable[str],\n    ) -> Iterator[tuple[str, \"CellT\"]]:\n        from dvc.compare import _format_field, with_value\n\n        for fname, data in d.items():\n            item = data.get(\"data\", {})\n            item = flatten(item) if isinstance(item, dict) else {fname: item}\n            for name in names[fname]:\n                value = with_value(\n                    item.get(name),\n                    error_value if data.get(\"error\") else fill_value,\n                )\n                # wrap field data in ui.rich_text, otherwise rich may\n                # interpret unescaped braces from list/dict types as rich\n                # markup tags\n                value = ui.rich_text(str(_format_field(value, precision)))\n                if name in headers:\n                    yield name, value\n                else:\n                    yield f\"{fname}:{name}\", value\n\n    if not exp.data:\n        return\n    yield from _d_cells(exp.data.metrics, metrics_names, metrics_headers)\n    yield from _d_cells(exp.data.params, params_names, params_headers)\n    for name in deps_names:\n        dep = exp.data.deps.get(name)\n        if dep:\n            yield name, dep.hash or fill_value\n\n\ndef format_time(\n    timestamp: Optional[datetime],\n    fill_value: Optional[str] = \"-\",\n    iso: bool = False,\n    **kwargs,\n) -> Optional[str]:\n    if not timestamp:\n        return fill_value\n    if iso:\n        return timestamp.isoformat()\n    if timestamp.date() == date.today():  # noqa: DTZ011\n        fmt = \"%I:%M %p\"\n    else:\n        fmt = \"%b %d, %Y\"\n    return timestamp.strftime(fmt)\n\n\nclass _DataNames(NamedTuple):\n    # NOTE: we use nested dict instead of set for metrics/params names to\n    # preserve key ordering\n    metrics: dict[str, dict[str, Any]]\n    params: dict[str, dict[str, Any]]\n    deps: set[str]\n\n    @property\n    def sorted_deps(self):\n        return sorted(self.deps)\n\n    def update(self, other: \"_DataNames\"):\n        def _update_d(\n            d: dict[str, dict[str, Any]], other_d: Mapping[str, Mapping[str, Any]]\n        ):\n            for k, v in other_d.items():\n                if k in d:\n                    d[k].update(v)\n                else:\n                    d[k] = dict(v)\n\n        _update_d(self.metrics, other.metrics)\n        _update_d(self.params, other.params)\n        self.deps.update(other.deps)\n\n\ndef _collect_names(exp_states: Iterable[\"ExpState\"]) -> _DataNames:\n    result = _DataNames(defaultdict(dict), defaultdict(dict), set())\n\n    def _collect_d(result_d: dict[str, dict[str, Any]], data_d: dict[str, Any]):\n        for path, item in data_d.items():\n            item = item.get(\"data\", {})\n            if isinstance(item, dict):\n                item = flatten(item)\n                result_d[path].update((key, None) for key in item)\n\n    for exp in exp_states:\n        if exp.data:\n            _collect_d(result.metrics, exp.data.metrics)\n            _collect_d(result.params, exp.data.params)\n            result.deps.update(exp.data.deps)\n        if exp.experiments:\n            for child in exp.experiments:\n                result.update(_collect_names(child.revs))\n\n    return result\n\n\ndef _normalize_headers(\n    names: Mapping[str, Mapping[str, Any]], count: Mapping[str, int]\n) -> list[str]:\n    return [\n        name if count[name] == 1 else f\"{path}:{name}\"\n        for path in names\n        for name in names[path]\n    ]\n"
  },
  {
    "path": "dvc/repo/experiments/stash.py",
    "content": "import re\nfrom collections.abc import Iterable, Iterator\nfrom contextlib import contextmanager\nfrom typing import NamedTuple, Optional\n\nfrom scmrepo.git import Stash\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc_objects.fs.local import localfs\nfrom dvc_objects.fs.utils import as_atomic\n\nfrom .refs import APPLY_HEAD, APPLY_STASH\n\nlogger = logger.getChild(__name__)\n\n\nclass ExpStashEntry(NamedTuple):\n    \"\"\"Experiment stash entry.\n\n    stash_index: Stash index for this entry. Can be None if this commit\n        is not pushed onto the stash ref.\n    head_rev: HEAD Git commit to be checked out for this experiment.\n    baseline_rev: Experiment baseline commit.\n    branch: Optional branch name for this experiment.\n    name: Optional exp name.\n    \"\"\"\n\n    stash_index: Optional[int]\n    head_rev: str\n    baseline_rev: str\n    branch: Optional[str]\n    name: Optional[str]\n\n\nclass ExpStash(Stash):\n    MESSAGE_FORMAT = \"dvc-exp:{rev}:{baseline_rev}:{name}\"\n    MESSAGE_RE = re.compile(\n        r\"(?:commit: )\"\n        r\"dvc-exp:(?P<rev>[0-9a-f]+):(?P<baseline_rev>[0-9a-f]+)\"\n        r\":(?P<name>[^~^:\\\\?\\[\\]*]*)\"\n        r\"(:(?P<branch>.+))?$\"\n    )\n\n    @property\n    def stash_revs(self) -> dict[str, ExpStashEntry]:\n        revs = {}\n        for i, entry in enumerate(self):\n            msg = entry.message.decode(\"utf-8\").strip()\n            m = self.MESSAGE_RE.match(msg)\n            if m:\n                revs[entry.new_sha.decode(\"utf-8\")] = ExpStashEntry(\n                    i,\n                    m.group(\"rev\"),\n                    m.group(\"baseline_rev\"),\n                    m.group(\"branch\"),\n                    m.group(\"name\"),\n                )\n        return revs\n\n    @classmethod\n    def format_message(\n        cls,\n        rev: str,\n        baseline_rev: str,\n        name: Optional[str] = None,\n        branch: Optional[str] = None,\n    ) -> str:\n        msg = cls.MESSAGE_FORMAT.format(\n            rev=rev, baseline_rev=baseline_rev, name=name if name else \"\"\n        )\n        branch_msg = f\":{branch}\" if branch else \"\"\n        return f\"{msg}{branch_msg}\"\n\n    def remove_revs(self, stash_revs: Iterable[ExpStashEntry]):\n        \"\"\"Remove the specified entries from the queue by stash revision.\"\"\"\n        for index in sorted(\n            (\n                entry.stash_index\n                for entry in stash_revs\n                if entry.stash_index is not None\n            ),\n            reverse=True,\n        ):\n            self.drop(index)\n\n\nclass ApplyStashEntry(NamedTuple):\n    \"\"\"Apply stash entry.\n\n    stash_index: Stash index for this entry. Can be None if this commit\n        is not pushed onto the stash ref.\n    head_rev: HEAD Git commit prior to exp apply.\n    rev: Applied experiment commit.\n    name: Optional applied exp name.\n    \"\"\"\n\n    stash_index: Optional[int]\n    head_rev: str\n    rev: str\n    name: Optional[str]\n\n\nclass ApplyStash(Stash):\n    DEFAULT_STASH = APPLY_STASH\n    MESSAGE_FORMAT = \"dvc-exp-apply:{head_rev}:{rev}:{name}\"\n    MESSAGE_RE = re.compile(\n        r\"(?:commit: )\"\n        r\"dvc-exp-apply:(?P<head_rev>[0-9a-f]+):(?P<rev>[0-9a-f]+)\"\n        r\":(?P<name>[^~^:\\\\?\\[\\]*]*)\"\n    )\n\n    @property\n    def stash_revs(self) -> dict[str, ApplyStashEntry]:\n        revs = {}\n        for i, entry in enumerate(self):\n            msg = entry.message.decode(\"utf-8\").strip()\n            m = self.MESSAGE_RE.match(msg)\n            if m:\n                revs[entry.new_sha.decode(\"utf-8\")] = ApplyStashEntry(\n                    i, m.group(\"head_rev\"), m.group(\"rev\"), m.group(\"name\")\n                )\n        return revs\n\n    @classmethod\n    def format_message(cls, head_rev: str, rev: str, name: Optional[str] = None) -> str:\n        return cls.MESSAGE_FORMAT.format(\n            head_rev=head_rev, rev=rev, name=name if name else \"\"\n        )\n\n    @contextmanager\n    def preserve_workspace(\n        self, rev: str, name: Optional[str] = None\n    ) -> Iterator[Optional[str]]:\n        if len(self):\n            logger.debug(\"Clearing existing exp-apply stash\")\n            self.clear()\n        head = self.scm.get_rev()\n        self.scm.set_ref(APPLY_HEAD, head)\n        message = self.format_message(head, rev, name=name)\n        stash_rev = self.push(message=message, include_untracked=True)\n        try:\n            yield stash_rev\n            if stash_rev:\n                self._apply_difference(stash_rev, rev)\n        except Exception:\n            self.revert_workspace()\n            raise\n\n    def _apply_difference(self, stash_rev: str, rev: str):\n        \"\"\"Selectively apply changes from stash_rev.\n\n        Only changes to files from left which do not exist in right will be applied.\n        \"\"\"\n        self._copy_difference(stash_rev, rev)\n        commit = self.scm.resolve_commit(stash_rev)\n        for parent_rev in commit.parents:\n            parent_commit = self.scm.resolve_commit(parent_rev)\n            if parent_commit.message.startswith(\"untracked files on \"):\n                self._copy_difference(parent_rev, rev)\n\n    def _copy_difference(self, left_rev: str, right_rev: str):\n        left_fs = self.scm.get_fs(left_rev)\n        right_fs = self.scm.get_fs(right_rev)\n        paths = [path for path in left_fs.find(\"/\") if not right_fs.exists(path)]\n        dest_paths = [\n            localfs.join(self.scm.root_dir, left_fs.relpath(path, \"/\"))\n            for path in paths\n        ]\n        for src, dest in zip(paths, dest_paths):\n            with as_atomic(localfs, dest, create_parents=True) as tmp_file:\n                left_fs.get_file(src, tmp_file)\n\n    def revert_workspace(self):\n        apply_head = self.scm.get_ref(self.ref)\n        head = self.scm.get_rev()\n        if apply_head != head:\n            raise DvcException(\n                f\"Cannot revert workspace, current HEAD '{head[:7]}' does not match the\"\n                f\" pre-apply HEAD '{apply_head[:7]}'\"\n            )\n        self.scm.reset(hard=True)\n        if len(self):\n            # In the event that the apply-stash and current workspace contain\n            # conflicting untracked files, we do:\n            #   1. stash the current untracked files\n            #   2. restore/pop the apply-stash (with untracked files)\n            #   3. restore/pop the untracked files from (1) and ignore any conflicts\n            #      (forcefully reverting to the apply-stash version)\n            workspace_rev = self.scm.stash.push(include_untracked=True)\n            try:\n                self.pop()\n            finally:\n                if workspace_rev:\n                    self.scm.stash.pop(skip_conflicts=True)\n        self.scm.remove_ref(self.ref)\n"
  },
  {
    "path": "dvc/repo/experiments/utils.py",
    "content": "import os\nimport random\nimport sys\nfrom collections import defaultdict\nfrom collections.abc import Generator, Iterable, Mapping, Sequence\nfrom functools import wraps\nfrom typing import TYPE_CHECKING, Callable, Optional, Union\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.repo.experiments.exceptions import AmbiguousExpRefInfo\nfrom dvc.rwlock import rwlock\nfrom dvc.scm import Git\n\nfrom .refs import (\n    EXEC_APPLY,\n    EXEC_BASELINE,\n    EXEC_BRANCH,\n    EXPS_NAMESPACE,\n    ITER_SKIP_NAMESPACES,\n    STASHES,\n    ExpRefInfo,\n)\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.scm import NoSCM\n\n\nEXEC_TMP_DIR = \"exps\"\nEXEC_PID_DIR = \"run\"\n\n\ndef get_exp_rwlock(\n    repo: \"Repo\",\n    reads: Optional[list[str]] = None,\n    writes: Optional[list[str]] = None,\n):\n    reads = reads or []\n    writes = writes or []\n\n    cmd = \" \".join(sys.argv)\n    assert repo.tmp_dir is not None\n    path = os.path.join(repo.tmp_dir, EXEC_TMP_DIR)\n    repo.fs.makedirs(path, exist_ok=True)\n\n    return rwlock(\n        path,\n        repo.fs,\n        cmd,\n        reads,\n        writes,\n        repo.config[\"core\"].get(\"hardlink_lock\", False),\n    )\n\n\ndef unlocked_repo(f):\n    @wraps(f)\n    def wrapper(exp, *args, **kwargs):\n        exp.repo.lock.unlock()\n        exp.repo._reset()\n        try:\n            ret = f(exp, *args, **kwargs)\n        finally:\n            exp.repo.lock.lock()\n        return ret\n\n    return wrapper\n\n\ndef _ignore_ref(ref: str) -> bool:\n    return (\n        any(ref.startswith(namespace) for namespace in ITER_SKIP_NAMESPACES)\n        or ref in STASHES\n    )\n\n\ndef exp_refs(\n    scm: \"Git\", url: Optional[str] = None\n) -> Generator[\"ExpRefInfo\", None, None]:\n    \"\"\"Iterate over all experiment refs.\"\"\"\n    ref_gen = (\n        iter_remote_refs(scm, url, base=EXPS_NAMESPACE)\n        if url\n        else scm.iter_refs(base=EXPS_NAMESPACE)\n    )\n    for ref in ref_gen:\n        if _ignore_ref(ref):\n            continue\n        yield ExpRefInfo.from_ref(ref)\n\n\ndef exp_refs_by_rev(scm: \"Git\", rev: str) -> Generator[ExpRefInfo, None, None]:\n    \"\"\"Iterate over all experiment refs pointing to the specified revision.\"\"\"\n    for ref in scm.get_refs_containing(rev, EXPS_NAMESPACE):\n        if not _ignore_ref(ref):\n            yield ExpRefInfo.from_ref(ref)\n\n\ndef exp_refs_by_baseline(\n    scm: \"Git\",\n    revs: Optional[set[str]] = None,\n    url: Optional[str] = None,\n) -> Mapping[str, list[ExpRefInfo]]:\n    \"\"\"Iterate over all experiment refs with the specified baseline.\"\"\"\n    all_exp_refs = exp_refs(scm, url)\n    result = defaultdict(list)\n    for ref in all_exp_refs:\n        if revs is None or ref.baseline_sha in revs:\n            result[ref.baseline_sha].append(ref)\n    return result\n\n\ndef iter_remote_refs(scm: \"Git\", url: str, base: Optional[str] = None, **kwargs):\n    from scmrepo.exceptions import AuthError, InvalidRemote\n\n    from dvc.scm import GitAuthError, InvalidRemoteSCMRepo\n\n    try:\n        yield from scm.iter_remote_refs(url, base=base, **kwargs)\n    except InvalidRemote as exc:\n        raise InvalidRemoteSCMRepo(str(exc))  # noqa: B904\n    except AuthError as exc:\n        raise GitAuthError(str(exc))  # noqa: B904\n\n\ndef push_refspec(\n    scm: \"Git\",\n    url: str,\n    push_list: Sequence[tuple[Optional[str], str]],\n    force: bool = False,\n    on_diverged: Optional[Callable[[str, str], bool]] = None,\n    **kwargs,\n):\n    from scmrepo.exceptions import AuthError\n    from scmrepo.git.backend.base import SyncStatus\n\n    from dvc.scm import GitAuthError, SCMError\n\n    refspecs = []\n    for src, dest in push_list:\n        if not src:\n            refspecs.append(f\":{dest}\")\n        elif src.endswith(\"/\"):\n            dest = dest.rstrip(\"/\") + \"/\"\n            for ref in scm.iter_refs(base=src):\n                refname = ref.split(\"/\")[-1]\n                refspecs.append(f\"{ref}:{dest}{refname}\")\n        elif dest.endswith(\"/\"):\n            refname = src.split(\"/\")[-1]\n            refspecs.append(f\"{src}:{dest}/{refname}\")\n        else:\n            refspecs.append(f\"{src}:{dest}\")\n\n    try:\n        results = scm.push_refspecs(\n            url, refspecs, force=force, on_diverged=on_diverged, **kwargs\n        )\n        diverged = [ref for ref in results if results[ref] == SyncStatus.DIVERGED]\n\n        if diverged:\n            raise SCMError(f\"local ref '{diverged}' diverged from remote '{url}'\")\n    except AuthError as exc:\n        raise GitAuthError(str(exc))  # noqa: B904\n\n\ndef remote_exp_refs(scm: \"Git\", url: str) -> Generator[ExpRefInfo, None, None]:\n    \"\"\"Iterate over all remote experiment refs.\"\"\"\n    for ref in iter_remote_refs(scm, url, base=EXPS_NAMESPACE):\n        if _ignore_ref(ref):\n            continue\n        yield ExpRefInfo.from_ref(ref)\n\n\ndef exp_refs_by_names(\n    scm: \"Git\", names: set[str], url: Optional[str] = None\n) -> dict[str, list[ExpRefInfo]]:\n    \"\"\"Iterate over all experiment refs matching the specified names.\"\"\"\n    resolve_results = defaultdict(list)\n    ref_info_gen = exp_refs(scm, url)\n    for ref_info in ref_info_gen:\n        if ref_info.name in names:\n            resolve_results[ref_info.name].append(ref_info)\n\n    return resolve_results\n\n\ndef remote_exp_refs_by_baseline(\n    scm: \"Git\", url: str, rev: str\n) -> Generator[ExpRefInfo, None, None]:\n    \"\"\"Iterate over all remote experiment refs with the specified baseline.\"\"\"\n    ref_info = ExpRefInfo(baseline_sha=rev)\n    for ref in iter_remote_refs(scm, url, base=str(ref_info)):\n        if _ignore_ref(ref):\n            continue\n        yield ExpRefInfo.from_ref(ref)\n\n\ndef exp_commits(\n    scm: \"Git\", ref_infos: Optional[Iterable[ExpRefInfo]] = None\n) -> Iterable[str]:\n    \"\"\"Iterate over all experiment commits.\"\"\"\n    shas: set[str] = set()\n    refs = ref_infos if ref_infos else exp_refs(scm)\n    for ref_info in refs:\n        shas.update(scm.branch_revs(str(ref_info), ref_info.baseline_sha))\n    yield from shas\n\n\ndef remove_exp_refs(scm: \"Git\", ref_infos: Iterable[ExpRefInfo]):\n    exec_branch = scm.get_ref(EXEC_BRANCH, follow=False)\n    exec_apply = scm.get_ref(EXEC_APPLY)\n\n    for ref_info in ref_infos:\n        ref = scm.get_ref(str(ref_info))\n        if exec_branch and str(ref_info):\n            scm.remove_ref(EXEC_BRANCH)\n        if exec_apply and exec_apply == ref:\n            scm.remove_ref(EXEC_APPLY)\n        scm.remove_ref(str(ref_info))\n\n\ndef fix_exp_head(scm: Union[\"Git\", \"NoSCM\"], ref: Optional[str]) -> Optional[str]:\n    if ref:\n        name, tail = Git.split_ref_pattern(ref)\n        if name == \"HEAD\" and scm.get_ref(EXEC_BASELINE):\n            return f\"{EXEC_BASELINE}{tail}\"\n    return ref\n\n\ndef resolve_name(\n    scm: \"Git\",\n    exp_names: Union[Iterable[str], str],\n    git_remote: Optional[str] = None,\n) -> dict[str, Optional[ExpRefInfo]]:\n    \"\"\"find the ref_info of specified names.\"\"\"\n    if isinstance(exp_names, str):\n        exp_names = [exp_names]\n\n    result = {}\n    unresolved = set()\n    for exp_name in exp_names:\n        if exp_name.startswith(\"refs/\"):\n            result[exp_name] = ExpRefInfo.from_ref(exp_name)\n        else:\n            unresolved.add(exp_name)\n\n    unresolved_result = exp_refs_by_names(scm, unresolved, git_remote)\n    cur_rev = scm.get_rev()\n    for name in unresolved:\n        ref_info_list = unresolved_result[name]\n        if not ref_info_list:\n            result[name] = None\n        elif len(ref_info_list) == 1:\n            result[name] = ref_info_list[0]\n        else:\n            for ref_info in ref_info_list:\n                if ref_info.baseline_sha == cur_rev:\n                    result[name] = ref_info\n                    break\n            else:\n                raise AmbiguousExpRefInfo(name, ref_info_list)\n    return result\n\n\ndef check_ref_format(scm: \"Git\", ref: ExpRefInfo):\n    # \"/\" forbidden, only in dvc exp as we didn't support it for now.\n    if not scm.check_ref_format(str(ref)) or \"/\" in ref.name:\n        raise InvalidArgumentError(\n            f\"Invalid exp name {ref.name}, the exp name must follow rules in \"\n            \"https://git-scm.com/docs/git-check-ref-format\"\n        )\n\n\ndef fetch_all_exps(scm: \"Git\", url: str, progress: Optional[Callable] = None, **kwargs):\n    refspecs = [\n        f\"{ref}:{ref}\"\n        for ref in iter_remote_refs(scm, url, base=EXPS_NAMESPACE)\n        if not _ignore_ref(ref)\n    ]\n    scm.fetch_refspecs(url, refspecs, progress=progress, **kwargs)\n\n\ndef gen_random_name():\n    # fmt: off\n    NOUNS = ('abac', 'abbs', 'aces', 'acid', 'acne', 'acre', 'acts', 'ados', 'adze', 'afro', 'agas', 'aged', 'ages', 'agio', 'agma', 'airs', 'airt', 'aits', 'akes', 'alap', 'albs', 'alga', 'ally', 'alto', 'amah', 'ambo', 'amie', 'amyl', 'ankh', 'apex', 'aqua', 'arcs', 'areg', 'aria', 'aril', 'arks', 'army', 'auks', 'aune', 'aura', 'awls', 'awns', 'axon', 'azan', 'baby', 'bade', 'bael', 'bags', 'bait', 'ball', 'banc', 'bang', 'bani', 'barb', 'bark', 'bate', 'bats', 'bawl', 'beak', 'bean', 'beep', 'belt', 'berk', 'beth', 'bias', 'bice', 'bids', 'bind', 'bise', 'bish', 'bite', 'boar', 'boat', 'body', 'boff', 'bold', 'boll', 'bolo', 'bomb', 'bond', 'book', 'boor', 'boot', 'bort', 'bosk', 'bots', 'bott', 'bout', 'bras', 'bree', 'brig', 'brio', 'buck', 'buhl', 'bump', 'bunk', 'bunt', 'buoy', 'byes', 'byte', 'cane', 'cant', 'caps', 'care', 'cart', 'cats', 'cedi', 'ceps', 'cere', 'chad', 'cham', 'chat', 'chay', 'chic', 'chin', 'chis', 'chiv', 'choc', 'chow', 'chum', 'ciao', 'cigs', 'clay', 'clip', 'clog', 'coal', 'coat', 'code', 'coed', 'cogs', 'coho', 'cole', 'cols', 'colt', 'conk', 'cons', 'cony', 'coof', 'cook', 'cool', 'coos', 'corm', 'cors', 'coth', 'cows', 'coze', 'crag', 'craw', 'cree', 'crib', 'cuds', 'cull', 'cult', 'curb', 'curn', 'curs', 'cusp', 'cuss', 'cwms', 'cyma', 'cyst', 'dabs', 'dado', 'daff', 'dais', 'daks', 'damn', 'dams', 'darg', 'dart', 'data', 'dawk', 'dawn', 'daws', 'daze', 'dean', 'debs', 'debt', 'deep', 'dees', 'dele', 'delf', 'dent', 'deys', 'dhow', 'digs', 'dirk', 'dita', 'diva', 'divs', 'doek', 'doge', 'dogs', 'dogy', 'dohs', 'doit', 'dole', 'doll', 'dolt', 'dona', 'dook', 'door', 'dops', 'doss', 'doxy', 'drab', 'drop', 'drum', 'duad', 'duct', 'duff', 'duke', 'dunk', 'dunt', 'ears', 'ease', 'eggs', 'eild', 'emeu', 'emus', 'envy', 'epha', 'eric', 'erns', 'esne', 'esse', 'ewes', 'expo', 'eyas', 'eyot', 'eyry', 'fare', 'farl', 'farm', 'feds', 'feel', 'fees', 'feme', 'fess', 'fibs', 'fids', 'fils', 'firm', 'fish', 'flab', 'flap', 'flea', 'flew', 'flex', 'flip', 'flit', 'flus', 'flux', 'foil', 'fond', 'food', 'fool', 'ford', 'fore', 'frit', 'friz', 'froe', 'funs', 'furl', 'fuss', 'fuzz', 'gaby', 'gaff', 'gale', 'gang', 'gaol', 'gape', 'gash', 'gaur', 'gaze', 'gear', 'genu', 'gest', 'geum', 'ghat', 'gigs', 'gimp', 'gird', 'girl', 'glee', 'glen', 'glia', 'glop', 'gnat', 'goad', 'goaf', 'gobs', 'gonk', 'good', 'goos', 'gore', 'gram', 'gray', 'grig', 'grip', 'grot', 'grub', 'gude', 'gula', 'gulf', 'guns', 'gust', 'gyms', 'gyro', 'hack', 'haet', 'hajj', 'hake', 'half', 'halm', 'hard', 'harl', 'hask', 'hate', 'heck', 'heel', 'heir', 'help', 'hems', 'here', 'hill', 'hips', 'hits', 'hobo', 'hock', 'hogs', 'hold', 'holy', 'hood', 'hoot', 'hope', 'horn', 'hose', 'hour', 'hows', 'huck', 'hugs', 'huia', 'hulk', 'hull', 'hunk', 'hunt', 'huts', 'hymn', 'ibex', 'ices', 'iglu', 'impi', 'inks', 'inti', 'ions', 'iota', 'iron', 'jabs', 'jags', 'jake', 'jass', 'jato', 'jaws', 'jean', 'jeer', 'jerk', 'jest', 'jiao', 'jigs', 'jill', 'jinn', 'jird', 'jive', 'jock', 'joey', 'jogs', 'joss', 'jota', 'jots', 'juba', 'jube', 'judo', 'jump', 'junk', 'jura', 'juts', 'jynx', 'kago', 'kail', 'kaka', 'kale', 'kana', 'keek', 'keep', 'kefs', 'kegs', 'kerf', 'kern', 'keys', 'kibe', 'kick', 'kids', 'kifs', 'kill', 'kina', 'kind', 'kine', 'kite', 'kiwi', 'knap', 'knit', 'koas', 'kobs', 'kyat', 'lack', 'lahs', 'lair', 'lama', 'lamb', 'lame', 'lats', 'lava', 'lays', 'leaf', 'leak', 'leas', 'lees', 'leks', 'leno', 'libs', 'lich', 'lick', 'lien', 'lier', 'lieu', 'life', 'lift', 'limb', 'line', 'link', 'linn', 'lira', 'loft', 'loge', 'loir', 'long', 'loof', 'look', 'loot', 'lore', 'loss', 'lots', 'loup', 'love', 'luce', 'ludo', 'luke', 'lulu', 'lure', 'lush', 'magi', 'maid', 'main', 'mako', 'male', 'mana', 'many', 'mart', 'mash', 'mast', 'mate', 'math', 'mats', 'matt', 'maul', 'maya', 'mays', 'meal', 'mean', 'meed', 'mela', 'mene', 'mere', 'merk', 'mesh', 'mete', 'mice', 'milo', 'mime', 'mina', 'mine', 'mirk', 'miss', 'mobs', 'moit', 'mold', 'molt', 'mome', 'moms', 'monk', 'moot', 'mope', 'more', 'morn', 'mows', 'moxa', 'much', 'mung', 'mush', 'muss', 'myth', 'name', 'nard', 'nark', 'nave', 'navy', 'neck', 'newt', 'nibs', 'nims', 'nine', 'nock', 'noil', 'noma', 'nosh', 'nowt', 'nuke', 'oafs', 'oast', 'oats', 'obit', 'odor', 'okra', 'omer', 'oner', 'ones', 'orcs', 'ords', 'orfe', 'orle', 'ossa', 'outs', 'over', 'owls', 'pail', 'pall', 'palp', 'pams', 'pang', 'pans', 'pant', 'paps', 'pate', 'pats', 'paws', 'pear', 'peba', 'pech', 'pecs', 'peel', 'peer', 'pees', 'pein', 'peri', 'phon', 'pice', 'pita', 'pith', 'play', 'plop', 'plot', 'plow', 'plug', 'plum', 'polo', 'pomp', 'pond', 'pons', 'pony', 'poof', 'pope', 'poss', 'pots', 'pour', 'prad', 'prat', 'prep', 'prob', 'prof', 'prow', 'puck', 'puds', 'puke', 'puku', 'pump', 'puns', 'pupa', 'purl', 'pyre', 'quad', 'quay', 'quey', 'quiz', 'raid', 'rail', 'rain', 'raja', 'rale', 'rams', 'rand', 'rant', 'raps', 'rasp', 'razz', 'rede', 'reef', 'reif', 'rein', 'repp', 'rial', 'ribs', 'rick', 'rift', 'rill', 'rime', 'rims', 'ring', 'rins', 'rise', 'rite', 'rits', 'roam', 'robe', 'rods', 'roma', 'rook', 'rort', 'rotl', 'roup', 'roux', 'rube', 'rubs', 'ruby', 'rues', 'rugs', 'ruin', 'runs', 'ryas', 'sack', 'sacs', 'saga', 'sail', 'sale', 'salp', 'salt', 'sand', 'sang', 'sash', 'saut', 'says', 'scab', 'scow', 'scud', 'scup', 'scut', 'seal', 'seam', 'sech', 'seed', 'seep', 'seer', 'self', 'sena', 'send', 'sera', 'sere', 'shad', 'shah', 'sham', 'shay', 'shes', 'ship', 'shoe', 'sick', 'sida', 'sign', 'sike', 'sima', 'sine', 'sing', 'sinh', 'sink', 'sins', 'site', 'size', 'skat', 'skin', 'skip', 'skis', 'slaw', 'sled', 'slew', 'sley', 'slob', 'slue', 'slug', 'smut', 'snap', 'snib', 'snip', 'snob', 'snog', 'snot', 'snow', 'snub', 'snug', 'soft', 'soja', 'soke', 'song', 'sons', 'sook', 'sorb', 'sori', 'souk', 'soul', 'sous', 'soya', 'spit', 'stay', 'stew', 'stir', 'stob', 'stud', 'suds', 'suer', 'suit', 'sumo', 'sums', 'sups', 'suqs', 'suss', 'sway', 'syce', 'synd', 'taal', 'tach', 'taco', 'tads', 'taka', 'tale', 'tamp', 'tams', 'tang', 'tans', 'tape', 'tare', 'taro', 'tarp', 'tart', 'tass', 'taus', 'teat', 'teds', 'teff', 'tegu', 'tell', 'term', 'thar', 'thaw', 'tics', 'tier', 'tiff', 'tils', 'tilt', 'tint', 'tipi', 'tire', 'tirl', 'toby', 'tods', 'toea', 'toff', 'toga', 'toil', 'toke', 'tola', 'tole', 'tomb', 'toms', 'torc', 'tors', 'tort', 'tosh', 'tote', 'tret', 'trey', 'trio', 'trug', 'tuck', 'tugs', 'tule', 'tune', 'tuns', 'tuts', 'tyke', 'tyne', 'typo', 'ulna', 'umbo', 'unau', 'unit', 'upas', 'user', 'uvea', 'vacs', 'vane', 'vang', 'vans', 'vara', 'vase', 'veep', 'veer', 'vega', 'veil', 'vela', 'vent', 'vies', 'view', 'vina', 'vine', 'vise', 'vlei', 'volt', 'vows', 'wads', 'waft', 'wage', 'wain', 'walk', 'want', 'wart', 'wave', 'waws', 'weal', 'wean', 'weds', 'weep', 'weft', 'weir', 'weka', 'weld', 'wens', 'weys', 'whap', 'whey', 'whin', 'whit', 'whop', 'wide', 'wife', 'wind', 'wine', 'wino', 'wins', 'wire', 'wise', 'woes', 'wont', 'wool', 'work', 'worm', 'wort', 'yack', 'yank', 'yapp', 'yard', 'yate', 'yawl', 'yegg', 'yell', 'yeuk', 'yews', 'yips', 'yobs', 'yogi', 'yoke', 'yolk', 'yoni', 'zack', 'zags', 'zest', 'zhos', 'zigs', 'zila', 'zips', 'ziti', 'zoea', 'zone', 'zoon')  # noqa: E501, Q000, N806\n    ADJECTIVES = ('about', 'above', 'abuzz', 'acerb', 'acold', 'acred', 'added', 'addle', 'adept', 'adult', 'adunc', 'adust', 'afoul', 'after', 'agape', 'agaze', 'agile', 'aging', 'agley', 'aglow', 'ahead', 'ahull', 'aided', 'alary', 'algal', 'alike', 'alive', 'alone', 'aloof', 'alpha', 'amber', 'amiss', 'amort', 'ample', 'amuck', 'angry', 'anile', 'apeak', 'apish', 'arced', 'areal', 'armed', 'aroid', 'ashen', 'aspen', 'astir', 'atilt', 'atrip', 'aulic', 'aural', 'awash', 'awful', 'awing', 'awned', 'axile', 'azoic', 'azure', 'baggy', 'baked', 'balky', 'bally', 'balmy', 'banal', 'bandy', 'bardy', 'bared', 'barer', 'barky', 'basal', 'based', 'baser', 'basic', 'batty', 'bawdy', 'beady', 'beaky', 'beamy', 'beaut', 'beefy', 'beery', 'beige', 'bendy', 'bifid', 'bijou', 'biped', 'birch', 'bitty', 'blame', 'bland', 'blank', 'blear', 'blest', 'blind', 'blond', 'blown', 'blowy', 'bluer', 'bluff', 'blunt', 'boned', 'bonny', 'boozy', 'bored', 'boric', 'bosky', 'bosom', 'bound', 'bovid', 'bowed', 'boxed', 'braky', 'brash', 'brief', 'briny', 'brisk', 'broad', 'broch', 'brood', 'brown', 'brute', 'buggy', 'bulgy', 'bumpy', 'burly', 'burnt', 'burry', 'bushy', 'busty', 'butch', 'buxom', 'cadgy', 'cagey', 'calmy', 'campy', 'canny', 'caped', 'cased', 'catty', 'cauld', 'cedar', 'cered', 'ceric', 'chary', 'cheap', 'cheek', 'chewy', 'chief', 'chill', 'chirk', 'choky', 'cissy', 'civil', 'cleft', 'coaly', 'color', 'comfy', 'comic', 'compo', 'conic', 'couth', 'coxal', 'crack', 'crank', 'crash', 'crass', 'crisp', 'cronk', 'cross', 'crude', 'cruel', 'crumb', 'cured', 'curly', 'curst', 'cushy', 'cutty', 'cynic', 'dated', 'dazed', 'dedal', 'deism', 'diazo', 'dicey', 'dingy', 'direr', 'dirty', 'dishy', 'dizzy', 'dolce', 'doped', 'dopey', 'dormy', 'dorty', 'dosed', 'dotal', 'dotty', 'dowdy', 'dowie', 'downy', 'dozen', 'drawn', 'dread', 'drear', 'dress', 'dried', 'ducky', 'duddy', 'dummy', 'dumpy', 'duple', 'dural', 'dusky', 'dusty', 'dutch', 'dying', 'eager', 'eaten', 'ebony', 'edged', 'eerie', 'eight', 'elder', 'elect', 'elfin', 'elite', 'empty', 'enate', 'enemy', 'epoxy', 'erect', 'ethic', 'every', 'extra', 'faced', 'faery', 'faint', 'famed', 'fancy', 'farci', 'fatal', 'fated', 'fatty', 'fazed', 'felon', 'fenny', 'ferny', 'fetal', 'fetid', 'fewer', 'fiery', 'fifty', 'filar', 'filmy', 'final', 'fined', 'finer', 'finny', 'fired', 'first', 'fishy', 'fixed', 'fizzy', 'flaky', 'flamy', 'flash', 'flawy', 'fleet', 'flory', 'flown', 'fluid', 'fluky', 'flush', 'focal', 'foggy', 'folio', 'forky', 'forte', 'forty', 'found', 'frail', 'frank', 'freed', 'freer', 'fresh', 'fried', 'front', 'frore', 'fuggy', 'funky', 'funny', 'furry', 'fusil', 'fussy', 'fuzzy', 'gabby', 'gamer', 'gamey', 'gamic', 'gammy', 'garni', 'gauge', 'gaunt', 'gauzy', 'gawky', 'gawsy', 'gemmy', 'genal', 'genic', 'ghast', 'gimpy', 'girly', 'glare', 'glary', 'glial', 'glued', 'gluey', 'godly', 'gooey', 'goofy', 'goosy', 'gouty', 'grade', 'grand', 'grapy', 'grave', 'gross', 'group', 'gruff', 'guest', 'gules', 'gulfy', 'gummy', 'gushy', 'gusty', 'gutsy', 'gutta', 'gypsy', 'gyral', 'hadal', 'hammy', 'handy', 'hardy', 'hasty', 'hated', 'hazel', 'heady', 'heapy', 'hefty', 'heigh', 'hempy', 'herby', 'hexed', 'hilly', 'hired', 'holey', 'honey', 'hooly', 'hoven', 'huger', 'hulky', 'humid', 'hunky', 'hyoid', 'idled', 'iliac', 'inane', 'incog', 'inert', 'inner', 'inter', 'iodic', 'ionic', 'irate', 'irony', 'itchy', 'jaggy', 'jammy', 'japan', 'jazzy', 'jerky', 'jetty', 'joint', 'jowly', 'juicy', 'jumpy', 'jural', 'kacha', 'kaput', 'kempt', 'keyed', 'kinky', 'known', 'kooky', 'kraal', 'laced', 'laigh', 'lairy', 'lamer', 'lardy', 'larky', 'lated', 'later', 'lathy', 'leady', 'leafy', 'leaky', 'leary', 'least', 'ledgy', 'leery', 'legal', 'leggy', 'lento', 'level', 'licht', 'licit', 'liege', 'light', 'liked', 'liney', 'lippy', 'lived', 'livid', 'loamy', 'loath', 'lobar', 'local', 'loony', 'loose', 'loral', 'losel', 'lousy', 'loved', 'lower', 'lowly', 'lowse', 'loyal', 'lucid', 'lucky', 'lumpy', 'lunar', 'lurid', 'lushy', 'lying', 'lyric', 'macho', 'macro', 'magic', 'major', 'malar', 'mangy', 'manky', 'manly', 'mardy', 'massy', 'mated', 'matte', 'mauve', 'mazed', 'mealy', 'meaty', 'medal', 'melic', 'mesic', 'mesne', 'messy', 'metal', 'miffy', 'milky', 'mined', 'minim', 'minor', 'minus', 'mired', 'mirky', 'misty', 'mixed', 'modal', 'model', 'moire', 'molar', 'moldy', 'moody', 'moony', 'mopey', 'moral', 'mossy', 'mothy', 'motor', 'mousy', 'moved', 'mucid', 'mucky', 'muddy', 'muggy', 'muley', 'mural', 'murky', 'mushy', 'muted', 'muzzy', 'myoid', 'naggy', 'naive', 'naked', 'named', 'nasty', 'natal', 'naval', 'nervy', 'newsy', 'nicer', 'niffy', 'nifty', 'ninth', 'nitty', 'nival', 'noble', 'nodal', 'noisy', 'north', 'nosed', 'noted', 'nowed', 'nubby', 'oaken', 'oared', 'oaten', 'obese', 'ocher', 'ochre', 'often', 'ohmic', 'oiled', 'olden', 'older', 'oleic', 'olive', 'optic', 'ortho', 'osmic', 'other', 'outer', 'ovoid', 'owing', 'owned', 'paced', 'pagan', 'paled', 'paler', 'pally', 'paper', 'pappy', 'parky', 'party', 'pasty', 'pavid', 'pawky', 'peaky', 'pearl', 'peart', 'peaty', 'pedal', 'peppy', 'perdu', 'perky', 'pesky', 'phony', 'piano', 'picky', 'piled', 'piney', 'pious', 'pique', 'pithy', 'platy', 'plump', 'plush', 'podgy', 'potty', 'power', 'prest', 'pricy', 'prima', 'prime', 'print', 'privy', 'prize', 'prone', 'proof', 'prosy', 'proud', 'proxy', 'pseud', 'pucka', 'pudgy', 'puffy', 'pukka', 'pupal', 'purer', 'pursy', 'pushy', 'pyoid', 'quack', 'quare', 'quasi', 'quiet', 'quits', 'rabic', 'rabid', 'radio', 'raked', 'randy', 'rapid', 'rarer', 'raspy', 'rathe', 'ratty', 'ready', 'reedy', 'reeky', 'refer', 'regal', 'riant', 'ridgy', 'right', 'riled', 'rimed', 'rindy', 'risen', 'risky', 'ritzy', 'rival', 'riven', 'robed', 'rocky', 'roily', 'roman', 'rooky', 'ropey', 'round', 'rowdy', 'ruddy', 'ruled', 'rummy', 'runic', 'runny', 'runty', 'rural', 'rusty', 'rutty', 'sable', 'salic', 'sandy', 'sappy', 'sarky', 'sassy', 'sated', 'saved', 'savvy', 'scald', 'scaly', 'scary', 'score', 'scrap', 'sedgy', 'seely', 'seral', 'sewed', 'shaky', 'sharp', 'sheen', 'shier', 'shill', 'shoal', 'shock', 'shoed', 'shore', 'short', 'shyer', 'silky', 'silly', 'silty', 'sixth', 'sixty', 'skint', 'slack', 'slant', 'sleek', 'slier', 'slimy', 'slung', 'small', 'smart', 'smoky', 'snaky', 'sneak', 'snide', 'snowy', 'snuff', 'soapy', 'sober', 'socko', 'solar', 'soled', 'solid', 'sonic', 'sooth', 'sooty', 'soppy', 'sorer', 'sound', 'soupy', 'spent', 'spicy', 'spiky', 'spiny', 'spiry', 'splay', 'split', 'sport', 'spumy', 'squat', 'staid', 'stiff', 'still', 'stoic', 'stone', 'stony', 'store', 'stout', 'straw', 'stray', 'strip', 'stung', 'suave', 'sudsy', 'sulfa', 'sulky', 'sunny', 'super', 'sural', 'surer', 'surfy', 'surgy', 'surly', 'swell', 'swept', 'swish', 'sworn', 'tabby', 'taboo', 'tacit', 'tacky', 'tamed', 'tamer', 'tangy', 'taped', 'tarot', 'tarry', 'tasty', 'tatty', 'taunt', 'tawie', 'teary', 'techy', 'telic', 'tenor', 'tense', 'tenth', 'tenty', 'tepid', 'terse', 'testy', 'third', 'tidal', 'tight', 'tiled', 'timid', 'tinct', 'tined', 'tippy', 'tipsy', 'tonal', 'toned', 'tonic', 'toric', 'total', 'tough', 'toxic', 'trade', 'treed', 'treen', 'trial', 'truer', 'tubal', 'tubby', 'tumid', 'tuned', 'tutti', 'twill', 'typal', 'typed', 'typic', 'umber', 'unapt', 'unbid', 'uncut', 'undue', 'undug', 'unfed', 'unfit', 'union', 'unlet', 'unmet', 'unwed', 'unwet', 'upper', 'upset', 'urban', 'utile', 'uveal', 'vagal', 'valid', 'vapid', 'varus', 'vatic', 'veiny', 'vital', 'vivid', 'vocal', 'vogie', 'volar', 'vying', 'wacky', 'wally', 'waney', 'warty', 'washy', 'waspy', 'waste', 'waugh', 'waxen', 'webby', 'wedgy', 'weeny', 'weepy', 'weest', 'weird', 'welsh', 'wersh', 'whist', 'white', 'whity', 'whole', 'wider', 'wight', 'winey', 'wired', 'wised', 'wiser', 'withy', 'wonky', 'woods', 'woozy', 'world', 'wormy', 'worse', 'worst', 'woven', 'wrath', 'wrier', 'wrong', 'wroth', 'xeric', 'yarer', 'yolky', 'young', 'yucky', 'yummy', 'zesty', 'zingy', 'zinky', 'zippy', 'zonal')  # noqa: E501, Q000, N806\n    # fmt: on\n    # Use custom random generator to make sure that names are random even if\n    # global random seed is set (common for ML pipelines).\n    random_generator = random.Random()  # noqa: S311\n    adjective = random_generator.choice(ADJECTIVES)\n    noun = random_generator.choice(NOUNS)\n    return f\"{adjective}-{noun}\"\n\n\ndef get_random_exp_name(scm, baseline_rev):\n    while True:\n        name = gen_random_name()\n        exp_ref = ExpRefInfo(baseline_sha=baseline_rev, name=name)\n        if not scm.get_ref(str(exp_ref)):\n            return name\n\n\ndef to_studio_params(dvc_params):\n    \"\"\"Convert from internal DVC format to Studio format.\n\n    From:\n\n    {\n        \"workspace\": {\n            \"data\": {\n                \"params.yaml\": {\n                    \"data\": {\"foo\": 1}\n                }\n            }\n        }\n    }\n\n    To:\n\n    {\n        \"params.yaml\": {\"foo\": 1}\n    }\n    \"\"\"\n    result: dict = {}\n    if not dvc_params:\n        return result\n    for rev_data in dvc_params.values():\n        for file_name, file_data in rev_data.get(\"data\", {}).items():\n            result[file_name] = file_data.get(\"data\", {})\n\n    return result\n\n\ndef describe(\n    scm: \"Git\",\n    revs: Iterable[str],\n    logger,\n    refs: Optional[Iterable[str]] = None,\n) -> dict[str, Optional[str]]:\n    \"\"\"Describe revisions using a tag, branch.\n\n    The first matching name will be returned for each rev. Names are preferred in this\n    order:\n        - current branch (if rev matches HEAD and HEAD is a branch)\n        - tags\n        - branches\n\n    Returns:\n        Dict mapping revisions from revs to a name.\n    \"\"\"\n\n    head_rev = scm.get_rev()\n    head_ref = scm.get_ref(\"HEAD\", follow=False)\n    if head_ref and head_ref.startswith(\"refs/heads/\"):\n        head_branch = head_ref[len(\"refs/heads/\") :]\n    else:\n        head_branch = None\n\n    tags = {}\n    branches = {}\n    ref_it = iter(refs) if refs else scm.iter_refs()\n    for ref in ref_it:\n        is_tag = ref.startswith(\"refs/tags/\")\n        is_branch = ref.startswith(\"refs/heads/\")\n        if not (is_tag or is_branch):\n            continue\n        rev = scm.get_ref(ref)\n        if not rev:\n            logger.debug(\"unresolved ref %s\", ref)\n            continue\n        if is_tag and rev not in tags:\n            tags[rev] = ref[len(\"refs/tags/\") :]\n        if is_branch and rev not in branches:\n            branches[rev] = ref[len(\"refs/heads/\") :]\n\n    names: dict[str, Optional[str]] = {}\n    for rev in revs:\n        if rev == head_rev and head_branch:\n            names[rev] = head_branch\n        else:\n            names[rev] = tags.get(rev) or branches.get(rev)\n\n    return names\n"
  },
  {
    "path": "dvc/repo/fetch.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc.exceptions import DownloadError\nfrom dvc.log import logger\nfrom dvc.stage.cache import RunCacheNotSupported\nfrom dvc.ui import ui\nfrom dvc_data.index import DataIndex, FileStorage\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.stage import Stage\n\nlogger = logger.getChild(__name__)\n\n\ndef _make_index_onerror(onerror, rev):\n    def _onerror(entry, exc):\n        if onerror:\n            return onerror(rev, entry, exc)\n        return None\n\n    return _onerror\n\n\ndef _collect_indexes(  # noqa: PLR0913\n    repo: \"Repo\",\n    targets=None,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    recursive=False,\n    all_commits=False,\n    revs=None,\n    workspace=True,\n    max_size=None,\n    types=None,\n    config=None,\n    onerror=None,\n    push=False,\n):\n    from .index import index_from_targets\n\n    indexes = {}\n    collection_exc = None\n\n    config = config or {}\n    if remote:\n        core = config.get(\"core\") or {}\n        core[\"remote\"] = remote\n        config[\"core\"] = core\n\n    def stage_filter(stage: \"Stage\") -> bool:\n        return not (push and stage.is_repo_import)\n\n    def outs_filter(out: \"Output\") -> bool:\n        if push and not out.can_push:\n            return False\n        return not (remote and out.remote and remote != out.remote)\n\n    for rev in repo.brancher(\n        revs=revs,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        workspace=workspace,\n    ):\n        try:\n            repo.config.merge(config)\n\n            idx = index_from_targets(\n                repo,\n                targets,\n                with_deps=with_deps,\n                recursive=recursive,\n                max_size=max_size,\n                types=types,\n                stage_filter=stage_filter,\n                outs_filter=outs_filter,\n            )\n\n            idx.data[\"repo\"].onerror = _make_index_onerror(onerror, rev)\n\n            indexes[rev or \"workspace\"] = idx\n        except Exception as exc:  # noqa: BLE001\n            if onerror:\n                onerror(rev, None, exc)\n            collection_exc = exc\n            logger.warning(\"failed to collect '%s', skipping\", rev or \"workspace\")\n\n    if not indexes and collection_exc:\n        raise collection_exc\n\n    return indexes\n\n\n@locked\ndef fetch(  # noqa: PLR0913\n    self: \"Repo\",\n    targets=None,\n    jobs=None,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    recursive=False,\n    all_commits=False,\n    run_cache=False,\n    revs=None,\n    workspace=True,\n    max_size=None,\n    types=None,\n    config=None,\n    onerror=None,\n) -> int:\n    \"\"\"Download data items from a cloud and imported repositories\n\n    Returns:\n        int: number of successfully downloaded files\n\n    Raises:\n        DownloadError: thrown when there are failed downloads, either\n            during `cloud.pull` or trying to fetch imported files\n\n        config.NoRemoteError: thrown when downloading only local files and no\n            remote is configured\n    \"\"\"\n    from fsspec.utils import tokenize\n\n    from dvc_data.index.fetch import collect\n    from dvc_data.index.fetch import fetch as ifetch\n\n    if isinstance(targets, str):\n        targets = [targets]\n\n    failed_count = 0\n    transferred_count = 0\n\n    try:\n        if run_cache:\n            self.stage_cache.pull(remote)\n    except RunCacheNotSupported as e:\n        logger.debug(\"failed to pull run cache: %s\", e)\n    except DownloadError as exc:\n        failed_count += exc.amount\n\n    indexes = _collect_indexes(\n        self,\n        targets=targets,\n        remote=remote,\n        all_branches=all_branches,\n        with_deps=with_deps,\n        all_tags=all_tags,\n        recursive=recursive,\n        all_commits=all_commits,\n        revs=revs,\n        workspace=workspace,\n        max_size=max_size,\n        types=types,\n        config=config,\n        onerror=onerror,\n    )\n\n    cache_key = (\n        \"fetch\",\n        tokenize(sorted(idx.data_tree.hash_info.value for idx in indexes.values())),\n    )\n\n    with ui.progress(desc=\"Collecting\", unit=\"entry\", leave=True) as pb:\n        data = collect(\n            [idx.data[\"repo\"] for idx in indexes.values()],\n            \"remote\",\n            cache_index=self.data_index,\n            cache_key=cache_key,\n            callback=pb.as_callback(),\n        )\n    data, unversioned_count = _log_unversioned(data)\n    failed_count += unversioned_count\n\n    with ui.progress(\n        desc=\"Fetching\",\n        bar_format=\"{desc}\",\n        leave=True,\n    ) as pb:\n        try:\n            fetch_transferred, fetch_failed = ifetch(\n                data,\n                jobs=jobs,\n                callback=pb.as_callback(),\n            )\n        finally:\n            for fs_index in data:\n                fs_index.close()\n\n    if fetch_transferred:\n        # NOTE: dropping cached index to force reloading from newly saved cache\n        self.drop_data_index()\n\n    transferred_count += fetch_transferred\n    failed_count += fetch_failed\n    if failed_count:\n        raise DownloadError(failed_count)\n\n    return transferred_count\n\n\ndef _log_unversioned(data: list[\"DataIndex\"]) -> tuple[list[\"DataIndex\"], int]:\n    ret: list[DataIndex] = []\n    unversioned: list[str] = []\n    for fs_index in data:\n        remote = fs_index.storage_map[()].remote\n        if not isinstance(remote, FileStorage) or not remote.fs.version_aware:\n            ret.append(fs_index)\n            continue\n\n        fs = remote.fs\n        index = DataIndex()\n        index.storage_map = fs_index.storage_map\n        for key, entry in fs_index.iteritems():\n            if entry.meta and not entry.meta.isdir and entry.meta.version_id is None:\n                unversioned.append(fs.unstrip_protocol(fs.join(remote.path, *key)))\n            else:\n                index[key] = entry\n        fs_index.close()\n        ret.append(index)\n\n    if unversioned:\n        logger.warning(\n            (\n                \"Some files are missing cloud version information and will not be \"\n                \"fetched from the remote:\\n%s\"\n            ),\n            \"\\n\".join(unversioned),\n        )\n    return ret, len(unversioned)\n"
  },
  {
    "path": "dvc/repo/freeze.py",
    "content": "import typing\n\nfrom . import locked\n\nif typing.TYPE_CHECKING:\n    from . import Repo\n\n\n@locked\ndef _set(repo: \"Repo\", target, frozen):\n    stage = repo.stage.get_target(target)\n    stage.frozen = frozen\n    stage.dump(update_lock=False)\n\n    return stage\n\n\ndef freeze(repo, target):\n    return _set(repo, target, True)\n\n\ndef unfreeze(repo, target):\n    return _set(repo, target, False)\n"
  },
  {
    "path": "dvc/repo/gc.py",
    "content": "from typing import TYPE_CHECKING, Optional\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.index import ObjectContainer\n\nlogger = logger.getChild(__name__)\n\n\ndef _validate_args(**kwargs):\n    not_in_remote = kwargs.pop(\"not_in_remote\", None)\n    cloud = kwargs.pop(\"cloud\", None)\n    remote = kwargs.pop(\"remote\", None)\n    if remote and not (cloud or not_in_remote):\n        raise InvalidArgumentError(\"`--remote` requires `--cloud` or `--not-in-remote`\")\n    if not_in_remote and cloud:\n        raise InvalidArgumentError(\n            \"`--not-in-remote` and `--cloud` are mutually exclusive\"\n        )\n    if not any(kwargs.values()):\n        raise InvalidArgumentError(\n            \"Either of `-w|--workspace`, `-a|--all-branches`, `-T|--all-tags` \"\n            \"`--all-experiments`, `--all-commits`, `--date` or `--rev` \"\n            \"needs to be set.\"\n        )\n    if kwargs.get(\"num\") and not (kwargs.get(\"rev\") or kwargs.get(\"all_branches\")):\n        raise InvalidArgumentError(\n            \"`--num` can only be used alongside `--rev` or `--all-branches`\"\n        )\n\n\ndef _used_obj_ids_not_in_remote(\n    remote_odb_to_obj_ids: \"ObjectContainer\", jobs: Optional[int] = None\n):\n    used_obj_ids = set()\n    remote_oids = set()\n    for remote_odb, obj_ids in remote_odb_to_obj_ids.items():\n        assert remote_odb\n        remote_oids.update(\n            remote_odb.list_oids_exists(\n                {x.value for x in obj_ids if x.value},\n                jobs=jobs,\n            )\n        )\n        used_obj_ids.update(obj_ids)\n    return {obj for obj in used_obj_ids if obj.value not in remote_oids}\n\n\n@locked\ndef gc(  # noqa: C901, PLR0912, PLR0913\n    self: \"Repo\",\n    all_branches: bool = False,\n    cloud: bool = False,\n    remote: Optional[str] = None,\n    with_deps: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    all_experiments: bool = False,\n    force: bool = False,\n    jobs: Optional[int] = None,\n    repos: Optional[list[str]] = None,\n    workspace: bool = False,\n    commit_date: Optional[str] = None,\n    rev: Optional[str] = None,\n    num: Optional[int] = None,\n    not_in_remote: bool = False,\n    dry: bool = False,\n    skip_failed: bool = False,\n):\n    # require `workspace` to be true to come into effect.\n    # assume `workspace` to be enabled if any of `all_tags`, `all_commits`,\n    # `all_experiments` or `all_branches` are enabled.\n    _validate_args(\n        workspace=workspace,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        all_branches=all_branches,\n        all_experiments=all_experiments,\n        commit_date=commit_date,\n        rev=rev,\n        num=num,\n        cloud=cloud,\n        not_in_remote=not_in_remote,\n    )\n\n    from contextlib import ExitStack\n\n    from dvc.repo import Repo\n    from dvc_data.hashfile.db import get_index\n    from dvc_data.hashfile.gc import gc as ogc\n\n    if not repos:\n        repos = []\n    all_repos = [Repo(path) for path in repos]\n\n    odb_to_obj_ids: ObjectContainer = {}\n    with ExitStack() as stack:\n        for repo in all_repos:\n            stack.enter_context(repo.lock)\n\n        for repo in [*all_repos, self]:\n            for odb, obj_ids in repo.used_objs(\n                all_branches=all_branches,\n                with_deps=with_deps,\n                all_tags=all_tags,\n                all_commits=all_commits,\n                all_experiments=all_experiments,\n                commit_date=commit_date,\n                remote=remote,\n                force=force,\n                jobs=jobs,\n                revs=[rev] if rev else None,\n                num=num or 1,\n                skip_failed=skip_failed,\n            ).items():\n                if odb not in odb_to_obj_ids:\n                    odb_to_obj_ids[odb] = set()\n                odb_to_obj_ids[odb].update(obj_ids)\n\n    if cloud or not_in_remote:\n        _merge_remote_obj_ids(self, remote, odb_to_obj_ids)\n    if not_in_remote:\n        used_obj_ids = _used_obj_ids_not_in_remote(odb_to_obj_ids, jobs=jobs)\n    else:\n        used_obj_ids = set()\n        used_obj_ids.update(*odb_to_obj_ids.values())\n\n    seen_odbs = set()\n    for scheme, odb in self.cache.by_scheme():\n        if not odb or odb in seen_odbs:\n            continue\n        seen_odbs.add(odb)\n        num_removed = ogc(odb, used_obj_ids, jobs=jobs, dry=dry)\n        if num_removed:\n            logger.info(\"Removed %d objects from %s cache.\", num_removed, scheme)\n        else:\n            logger.info(\"No unused '%s' cache to remove.\", scheme)\n\n    if not cloud:\n        return\n\n    for remote_odb, obj_ids in odb_to_obj_ids.items():\n        assert remote_odb is not None\n        num_removed = ogc(remote_odb, obj_ids, jobs=jobs, dry=dry)\n        if num_removed:\n            get_index(remote_odb).clear()\n            logger.info(\"Removed %d objects from remote.\", num_removed)\n        else:\n            logger.info(\"No unused cache to remove from remote.\")\n\n\ndef _merge_remote_obj_ids(\n    repo: \"Repo\", remote: Optional[str], used_objs: \"ObjectContainer\"\n):\n    # Merge default remote used objects with remote-per-output used objects\n    default_obj_ids = used_objs.pop(None, set())\n    remote_odb = repo.cloud.get_remote_odb(remote, \"gc -c\", hash_name=\"md5\")\n    if remote_odb not in used_objs:\n        used_objs[remote_odb] = set()\n    used_objs[remote_odb].update(default_obj_ids)\n    legacy_odb = repo.cloud.get_remote_odb(remote, \"gc -c\", hash_name=\"md5-dos2unix\")\n    if legacy_odb not in used_objs:\n        used_objs[legacy_odb] = set()\n    used_objs[legacy_odb].update(default_obj_ids)\n"
  },
  {
    "path": "dvc/repo/get.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Union\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.utils import resolve_output\n\nif TYPE_CHECKING:\n    from dvc.fs.dvc import DVCFileSystem\n\n\nlogger = logger.getChild(__name__)\n\n\nclass GetDVCFileError(DvcException):\n    def __init__(self):\n        super().__init__(\n            \"the given path is a DVC file, you must specify a data file or a directory\"\n        )\n\n\ndef get(\n    url,\n    path,\n    out=None,\n    rev=None,\n    jobs=None,\n    force=False,\n    config=None,\n    remote=None,\n    remote_config=None,\n):\n    from dvc.config import Config\n    from dvc.dvcfile import is_valid_filename\n    from dvc.repo import Repo\n\n    out = resolve_output(path, out, force=force)\n\n    if is_valid_filename(out):\n        raise GetDVCFileError\n\n    if config and not isinstance(config, dict):\n        config = Config.load_file(config)\n\n    with Repo.open(\n        url=url,\n        rev=rev,\n        subrepos=True,\n        uninitialized=True,\n        config=config,\n        remote=remote,\n        remote_config=remote_config,\n    ) as repo:\n        from dvc.fs import download\n        from dvc.fs.data import DataFileSystem\n\n        fs: Union[DataFileSystem, DVCFileSystem]\n        if os.path.isabs(path):\n            fs = DataFileSystem(index=repo.index.data[\"local\"])\n            fs_path = fs.from_os_path(path)\n        else:\n            fs = repo.dvcfs\n            fs_path = fs.from_os_path(path)\n        download(fs, fs_path, os.path.abspath(out), jobs=jobs)\n"
  },
  {
    "path": "dvc/repo/get_url.py",
    "content": "import os\n\nfrom dvc import output\nfrom dvc.exceptions import URLMissingError\nfrom dvc.fs import download, parse_external_url\nfrom dvc.utils import resolve_output\n\n\ndef get_url(url, out=None, *, fs_config=None, jobs=None, force=False, config=None):\n    out = resolve_output(url, out, force=force)\n    out = os.path.abspath(out)\n    (out,) = output.loads_from(None, [out], use_cache=False)\n\n    src_fs, src_path = parse_external_url(url, fs_config, config=config)\n    if not src_fs.exists(src_path):\n        raise URLMissingError(url)\n    download(src_fs, src_path, out.fs_path, jobs=jobs)\n"
  },
  {
    "path": "dvc/repo/graph.py",
    "content": "from collections.abc import Iterator\nfrom typing import TYPE_CHECKING, Any, Optional, TypeVar\n\nfrom dvc.fs import localfs\nfrom dvc.utils.fs import path_isin\n\nif TYPE_CHECKING:\n    from networkx import DiGraph\n\n    from dvc.stage import Stage\n\nT = TypeVar(\"T\")\n\n\ndef check_acyclic(graph: \"DiGraph\") -> None:\n    import networkx as nx\n\n    from dvc.exceptions import CyclicGraphError\n\n    try:\n        edges = nx.find_cycle(graph, orientation=\"original\")\n    except nx.NetworkXNoCycle:\n        return\n\n    stages: set[Stage] = set()\n    for from_node, to_node, _ in edges:\n        stages.add(from_node)\n        stages.add(to_node)\n\n    raise CyclicGraphError(list(stages))\n\n\ndef get_pipeline(pipelines, node):\n    found = [i for i in pipelines if i.has_node(node)]\n    if not found:\n        return None\n\n    assert len(found) == 1\n    return found[0]\n\n\ndef get_pipelines(graph: \"DiGraph\"):\n    import networkx as nx\n\n    return [graph.subgraph(c).copy() for c in nx.weakly_connected_components(graph)]\n\n\ndef get_subgraph_of_nodes(\n    graph: \"DiGraph\", sources: Optional[list[Any]] = None, downstream: bool = False\n) -> \"DiGraph\":\n    from networkx import dfs_postorder_nodes, reverse_view\n\n    if not sources:\n        return graph\n\n    g = reverse_view(graph) if downstream else graph\n    nodes = []\n    for source in sources:\n        nodes.extend(dfs_postorder_nodes(g, source))\n    return graph.subgraph(nodes)\n\n\ndef collect_pipeline(stage: \"Stage\", graph: \"DiGraph\") -> Iterator[\"Stage\"]:\n    import networkx as nx\n\n    pipeline = get_pipeline(get_pipelines(graph), stage)\n    if not pipeline:\n        return iter([])\n\n    return nx.dfs_postorder_nodes(pipeline, stage)\n\n\ndef collect_inside_path(path: str, graph: \"DiGraph\") -> list[\"Stage\"]:\n    import networkx as nx\n\n    stages = nx.dfs_postorder_nodes(graph)\n    return [stage for stage in stages if path_isin(stage.path, path)]\n\n\ndef build_graph(stages, outs_trie=None):\n    \"\"\"Generate a graph by using the given stages on the given directory\n\n    The nodes of the graph are the stage's path relative to the root.\n\n    Edges are created when the output of one stage is used as a\n    dependency in other stage.\n\n    The direction of the edges goes from the stage to its dependency:\n\n    For example, running the following:\n\n        $ dvc run -o A \"echo A > A\"\n        $ dvc run -d A -o B \"echo B > B\"\n        $ dvc run -d B -o C \"echo C > C\"\n\n    Will create the following graph:\n\n           ancestors <--\n                       |\n            C.dvc -> B.dvc -> A.dvc\n            |          |\n            |          --> descendants\n            |\n            ------- pipeline ------>\n                       |\n                       v\n          (weakly connected components)\n\n    Args:\n        stages (list): used to build a graph from\n\n    Raises:\n        OutputDuplicationError: two outputs with the same path\n        StagePathAsOutputError: stage inside an output directory\n        OverlappingOutputPathsError: output inside output directory\n        CyclicGraphError: resulting graph has cycles\n    \"\"\"\n    import networkx as nx\n\n    from dvc.dependency import DatasetDependency\n    from dvc.exceptions import StagePathAsOutputError\n\n    from .trie import build_outs_trie\n\n    graph = nx.DiGraph()\n\n    # Use trie to efficiently find overlapping outs and deps\n    outs_trie = outs_trie or build_outs_trie(stages)\n\n    for stage in stages:\n        out = outs_trie.shortest_prefix(localfs.parts(stage.path)).value\n        if out:\n            raise StagePathAsOutputError(stage, str(out))\n\n    # Building graph\n    graph.add_nodes_from(stages)\n    for stage in stages:\n        if stage.is_repo_import:\n            continue\n        if stage.is_db_import:\n            continue\n\n        for dep in stage.deps:\n            if isinstance(dep, DatasetDependency):\n                continue\n            dep_key = dep.fs.parts(dep.fs_path)\n            overlapping = [n.value for n in outs_trie.prefixes(dep_key)]\n            if outs_trie.has_subtrie(dep_key):\n                overlapping.extend(outs_trie.values(prefix=dep_key))\n\n            graph.add_edges_from((stage, out.stage) for out in overlapping)\n    check_acyclic(graph)\n\n    return graph\n\n\n# NOTE: using stage graph instead of just list of stages to make sure that it\n# has already passed all the sanity checks like cycles/overlapping outputs and\n# so on.\ndef build_outs_graph(graph, outs_trie):\n    import networkx as nx\n\n    from dvc.dependency import DatasetDependency\n\n    outs_graph = nx.DiGraph()\n\n    outs_graph.add_nodes_from(outs_trie.values())\n    for stage in graph.nodes():\n        if stage.is_repo_import:\n            continue\n        if stage.is_db_import:\n            continue\n        for dep in stage.deps:\n            if isinstance(dep, DatasetDependency):\n                continue\n            dep_key = dep.fs.parts(dep.fs_path)\n            overlapping = [n.value for n in outs_trie.prefixes(dep_key)]\n            if outs_trie.has_subtrie(dep_key):\n                overlapping.extend(outs_trie.values(prefix=dep_key))\n\n            for from_out in stage.outs:\n                outs_graph.add_edges_from((from_out, out) for out in overlapping)\n    return outs_graph\n"
  },
  {
    "path": "dvc/repo/imp.py",
    "content": "def imp(\n    self,\n    url,\n    path,\n    out=None,\n    rev=None,\n    config=None,\n    remote=None,\n    remote_config=None,\n    **kwargs,\n):\n    erepo = {\"url\": url}\n    if rev is not None:\n        erepo[\"rev\"] = rev\n\n    if remote and remote_config and isinstance(config, str):\n        raise ValueError(\n            \"Can't specify config path together with both remote and remote_config\"\n        )\n\n    if config is not None:\n        erepo[\"config\"] = config\n\n    if remote is not None and remote_config is not None:\n        conf = erepo.get(\"config\") or {}\n\n        core = conf.get(\"core\") or {}\n        core[\"remote\"] = remote\n\n        remotes = conf.get(\"remote\") or {}\n        remote_conf = remotes.get(remote) or {}\n        remote_conf.update(remote_config)\n        remotes[remote] = remote_conf\n\n        conf[\"core\"] = core\n        conf[\"remote\"] = remotes\n\n        erepo[\"config\"] = conf\n    elif remote is not None:\n        erepo[\"remote\"] = remote\n    elif remote_config is not None:\n        erepo[\"remote\"] = remote_config\n\n    return self.imp_url(path, out=out, erepo=erepo, frozen=True, **kwargs)\n"
  },
  {
    "path": "dvc/repo/imp_db.py",
    "content": "from typing import TYPE_CHECKING, Optional\n\nfrom funcy import compact\n\nfrom dvc.exceptions import OutputDuplicationError\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.utils import resolve_output, resolve_paths\n\nif TYPE_CHECKING:\n    from . import Repo\n\nfrom . import locked\n\n\n@locked\n@scm_context\ndef imp_db(\n    self: \"Repo\",\n    sql: Optional[str] = None,\n    table: Optional[str] = None,\n    frozen: bool = True,\n    output_format: str = \"csv\",\n    out: Optional[str] = None,\n    force: bool = False,\n    connection: Optional[str] = None,\n):\n    assert sql or table\n    assert output_format in (\"csv\", \"json\")\n\n    db: dict[str, str] = compact(\n        {\n            \"connection\": connection,\n            \"file_format\": output_format,\n            \"query\": sql,\n            \"table\": table,\n        }\n    )\n\n    file_name = table or \"results\"\n    out = out or f\"{file_name}.{output_format}\"\n    out = resolve_output(\".\", out, force=force)\n\n    path, wdir, out = resolve_paths(self, out, always_local=True)\n    stage = self.stage.create(\n        single_stage=True,\n        validate=False,\n        fname=path,\n        deps=[None],\n        wdir=wdir,\n        outs=[out],\n        db=db,\n    )\n\n    try:\n        self.check_graph(stages={stage})\n    except OutputDuplicationError as exc:\n        raise OutputDuplicationError(  # noqa: B904\n            exc.output, set(exc.stages) - {stage}\n        )\n\n    stage.run()\n    stage.frozen = frozen\n    stage.dump()\n    return stage\n"
  },
  {
    "path": "dvc/repo/imp_url.py",
    "content": "import os\nfrom typing import TYPE_CHECKING\n\nfrom dvc.exceptions import InvalidArgumentError, OutputDuplicationError\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.utils import relpath, resolve_output, resolve_paths\nfrom dvc.utils.fs import path_isin\n\nif TYPE_CHECKING:\n    from . import Repo\n\nfrom . import locked\n\n\n@locked\n@scm_context\ndef imp_url(  # noqa: PLR0913\n    self: \"Repo\",\n    url,\n    out=None,\n    erepo=None,\n    frozen=True,\n    no_download=False,\n    no_exec=False,\n    remote=None,\n    to_remote=False,\n    jobs=None,\n    force=False,\n    fs_config=None,\n    version_aware: bool = False,\n):\n    out = resolve_output(url, out, force=force)\n    path, wdir, out = resolve_paths(self, out, always_local=to_remote and not out)\n\n    if to_remote and (no_exec or no_download or version_aware):\n        raise InvalidArgumentError(\n            \"--no-exec/--no-download/--version-aware cannot be combined with \"\n            \"--to-remote\"\n        )\n\n    if not to_remote and remote:\n        raise InvalidArgumentError(\"--remote can't be used without --to-remote\")\n\n    # NOTE: when user is importing something from within their own repository\n    if (\n        erepo is None\n        and os.path.exists(url)\n        and path_isin(os.path.abspath(url), self.root_dir)\n    ):\n        url = relpath(url, wdir)\n\n    if version_aware:\n        if fs_config is None:\n            fs_config = {}\n        fs_config[\"version_aware\"] = True\n\n    stage = self.stage.create(\n        single_stage=True,\n        validate=False,\n        fname=path,\n        wdir=wdir,\n        deps=[url],\n        outs=[out],\n        erepo=erepo,\n        fs_config=fs_config,\n    )\n\n    try:\n        self.check_graph(stages={stage})\n    except OutputDuplicationError as exc:\n        raise OutputDuplicationError(  # noqa: B904\n            exc.output, set(exc.stages) - {stage}\n        )\n\n    if no_exec:\n        stage.ignore_outs()\n    elif to_remote:\n        remote_odb = self.cloud.get_remote_odb(remote, \"import-url\")\n        stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs)\n        stage.outs[0].ignore()\n        stage.save_deps()\n        stage.md5 = stage.compute_md5()\n    else:\n        if stage.deps[0].fs.version_aware:\n            stage.outs[0].can_push = False\n        stage.run(jobs=jobs, no_download=no_download)\n\n    stage.frozen = frozen\n    stage.dump()\n    return stage\n"
  },
  {
    "path": "dvc/repo/index.py",
    "content": "import logging\nimport time\nfrom collections import defaultdict\nfrom collections.abc import Iterable, Iterator\nfrom functools import partial\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any, Callable, NamedTuple, Optional, Union\n\nfrom funcy.debug import format_time\n\nfrom dvc.dependency import ParamsDependency\nfrom dvc.fs import LocalFileSystem\nfrom dvc.fs.callbacks import DEFAULT_CALLBACK\nfrom dvc.log import logger\nfrom dvc.utils.objects import cached_property\n\nif TYPE_CHECKING:\n    from networkx import DiGraph\n    from pygtrie import Trie\n    from typing_extensions import Self\n\n    from dvc.dependency import Dependency\n    from dvc.fs.callbacks import Callback\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.repo.stage import StageInfo\n    from dvc.stage import Stage\n    from dvc.types import TargetType\n    from dvc_data.hashfile.db import HashFileDB\n    from dvc_data.hashfile.hash_info import HashInfo\n    from dvc_data.index import DataIndex, DataIndexKey, DataIndexView\n    from dvc_objects.fs.base import FileSystem\n\n\nlogger = logger.getChild(__name__)\nObjectContainer = dict[Optional[\"HashFileDB\"], set[\"HashInfo\"]]\n\n\ndef log_walk(seq):\n    for root, dirs, files in seq:\n        start = time.perf_counter()\n        yield root, dirs, files\n        duration = format_time(time.perf_counter() - start)\n        logger.trace(\"%s in collecting stages from %s\", duration, root)\n\n\ndef collect_files(\n    repo: \"Repo\", onerror: Optional[Callable[[str, Exception], None]] = None\n):\n    \"\"\"Collects all of the stages present in the DVC repo.\n\n    Args:\n        onerror (optional): callable that will be called with two args:\n            the filepath whose collection failed and the exc instance.\n            It can report the error to continue with the collection\n            (and, skip failed ones), or raise the exception to abort\n            the collection.\n    \"\"\"\n    from dvc.dvcfile import is_valid_filename\n    from dvc.exceptions import DvcException\n    from dvc.utils import relpath\n\n    scm = repo.scm\n    fs = repo.fs\n    sep = fs.sep\n    outs: set[str] = set()\n\n    is_local_fs = isinstance(fs, LocalFileSystem)\n\n    def is_ignored(path: str) -> bool:\n        # apply only for the local fs\n        return is_local_fs and scm.is_ignored(path)\n\n    def is_dvcfile_and_not_ignored(root: str, file: str) -> bool:\n        return is_valid_filename(file) and not is_ignored(f\"{root}{sep}{file}\")\n\n    def is_out_or_ignored(root: str, directory: str) -> bool:\n        dir_path = f\"{root}{sep}{directory}\"\n        # trailing slash needed to check if a directory is gitignored\n        return dir_path in outs or is_ignored(f\"{dir_path}{sep}\")\n\n    walk_iter = repo.dvcignore.walk(fs, repo.root_dir, followlinks=False)\n    if logger.isEnabledFor(logging.TRACE):  # type: ignore[attr-defined]\n        walk_iter = log_walk(walk_iter)\n\n    for root, dirs, files in walk_iter:\n        assert isinstance(dirs, list)\n        assert isinstance(files, list)\n        dvcfile_filter = partial(is_dvcfile_and_not_ignored, root)\n        for file in filter(dvcfile_filter, files):\n            file_path = fs.join(root, file)\n            try:\n                index = Index.from_file(repo, file_path)\n            except DvcException as exc:\n                if onerror:\n                    onerror(relpath(file_path), exc)\n                    continue\n                raise\n\n            outs.update(\n                out.fspath\n                for stage in index.stages\n                for out in stage.outs\n                if out.protocol == \"local\"\n            )\n            yield file_path, index\n        dirs[:] = [d for d in dirs if not is_out_or_ignored(root, d)]\n\n\ndef _load_data_from_tree(index, prefix, ws, key, tree, hash_name):\n    from dvc_data.index import DataIndexEntry, Meta\n\n    parents = set()\n\n    for okey, ometa, ohi in tree:\n        for key_len in range(1, len(okey)):\n            parents.add((*key, *okey[:key_len]))\n\n        fkey = (*key, *okey)\n        index[(*prefix, ws, *fkey)] = DataIndexEntry(\n            key=fkey,\n            meta=ometa,\n            hash_info=ohi if (ohi and ohi.name == hash_name) else None,\n        )\n\n    for parent in parents:\n        index[(*prefix, ws, *parent)] = DataIndexEntry(\n            key=parent, meta=Meta(isdir=True), loaded=True\n        )\n\n\ndef _load_data_from_outs(index, prefix, outs):\n    from dvc_data.index import DataIndexEntry, Meta\n\n    parents = set()\n    for out in outs:\n        if not out.use_cache:\n            continue\n\n        ws, key = out.index_key\n\n        for key_len in range(1, len(key)):\n            parents.add((ws, key[:key_len]))\n\n        tree = None\n        if (\n            out.stage.is_import\n            and not out.stage.is_repo_import\n            and not out.stage.is_db_import\n            and out.stage.deps[0].files\n        ):\n            tree = out.stage.deps[0].get_obj()\n        elif out.files:\n            tree = out.get_obj()\n\n        if tree is not None:\n            _load_data_from_tree(index, prefix, ws, key, tree, out.hash_name)\n\n        entry = DataIndexEntry(\n            key=key,\n            meta=out.meta,\n            hash_info=out.hash_info,\n            loaded=None if tree is None else True,\n        )\n\n        if (\n            out.stage.is_import\n            and not out.stage.is_repo_import\n            and not out.stage.is_db_import\n        ):\n            dep = out.stage.deps[0]\n            entry.meta = dep.meta\n            if out.hash_info:\n                entry.hash_info = out.hash_info\n            else:\n                # partial import\n                entry.hash_info = dep.hash_info\n\n        # FIXME PyGTrie-based DataIndex doesn't remove entry.key during\n        # index.add, so we have to set the entry manually here to make\n        # index.view() work correctly.\n        index[(*prefix, ws, *key)] = entry\n\n    for ws, key in parents:\n        index[(*prefix, ws, *key)] = DataIndexEntry(\n            key=key, meta=Meta(isdir=True), loaded=True\n        )\n\n\ndef _load_storage_from_import(storage_map, key, out):\n    from fsspec.utils import tokenize\n\n    from dvc_data.index import FileStorage\n\n    if out.stage.is_db_import:\n        return\n\n    dep = out.stage.deps[0]\n    if not out.hash_info or dep.fs.version_aware:\n        if dep.meta and dep.meta.isdir:\n            meta_token = dep.hash_info.value\n        else:\n            meta_token = tokenize(dep.meta.to_dict())\n\n        fs_cache = out.repo.cache.fs_cache\n        storage_map.add_cache(\n            FileStorage(\n                key,\n                fs_cache.fs,\n                fs_cache.fs.join(\n                    fs_cache.path,\n                    dep.fs.protocol,\n                    tokenize(dep.fs_path, meta_token),\n                ),\n            )\n        )\n\n    if out.stage.is_repo_import or not out.hash_info or dep.fs.version_aware:\n        storage_map.add_remote(FileStorage(key, dep.fs, dep.fs_path, read_only=True))\n\n\ndef _load_storage_from_out(storage_map, key, out):\n    from dvc.cachemgr import LEGACY_HASH_NAMES\n    from dvc.config import NoRemoteError\n    from dvc_data.index import FileStorage, ObjectStorage\n\n    if out.cache:\n        storage_map.add_cache(ObjectStorage(key, out.cache))\n\n    try:\n        remote = out.repo.cloud.get_remote(out.remote)\n        if remote.fs.version_aware:\n            storage_map.add_remote(\n                FileStorage(\n                    key=key,\n                    fs=remote.fs,\n                    path=remote.path,\n                    index=remote.index,\n                    prefix=(),\n                    read_only=(not out.can_push),\n                )\n            )\n        else:\n            odb = (\n                remote.legacy_odb if out.hash_name in LEGACY_HASH_NAMES else remote.odb\n            )\n            storage_map.add_remote(\n                ObjectStorage(\n                    key, odb, index=remote.index, read_only=(not out.can_push)\n                )\n            )\n    except NoRemoteError:\n        pass\n\n    if out.stage.is_import:\n        _load_storage_from_import(storage_map, key, out)\n\n\ndef _build_tree_from_outs(outs):\n    from dvc_data.hashfile.tree import Tree\n\n    tree = Tree()\n    for out in outs:\n        if not out.use_cache:\n            continue\n\n        ws, key = out.index_key\n\n        if not out.stage.is_partial_import:\n            tree.add((ws, *key), out.meta, out.hash_info)\n            continue\n\n        dep = out.stage.deps[0]\n        if not dep.files:\n            tree.add((ws, *key), dep.meta, dep.hash_info)\n            continue\n\n        for okey, ometa, ohi in dep.get_obj():\n            tree.add((ws, *key, *okey), ometa, ohi)\n\n    tree.digest()\n\n    return tree\n\n\nclass Index:\n    def __init__(\n        self,\n        repo: \"Repo\",\n        stages: Optional[list[\"Stage\"]] = None,\n        metrics: Optional[dict[str, list[str]]] = None,\n        plots: Optional[dict[str, list[str]]] = None,\n        params: Optional[dict[str, Any]] = None,\n        artifacts: Optional[dict[str, Any]] = None,\n        datasets: Optional[dict[str, list[dict[str, Any]]]] = None,\n        datasets_lock: Optional[dict[str, list[dict[str, Any]]]] = None,\n    ) -> None:\n        self.repo = repo\n        self.stages = stages or []\n        self._metrics = metrics or {}\n        self._plots = plots or {}\n        self._params = params or {}\n        self._artifacts = artifacts or {}\n        self._datasets: dict[str, list[dict[str, Any]]] = datasets or {}\n        self._datasets_lock: dict[str, list[dict[str, Any]]] = datasets_lock or {}\n        self._collected_targets: dict[int, list[StageInfo]] = {}\n\n    @cached_property\n    def rev(self) -> Optional[str]:\n        if not isinstance(self.repo.fs, LocalFileSystem):\n            return self.repo.get_rev()[:7]\n        return None\n\n    def __repr__(self) -> str:\n        rev = self.rev or \"workspace\"\n        return f\"Index({self.repo}, fs@{rev})\"\n\n    @classmethod\n    def from_repo(\n        cls,\n        repo: \"Repo\",\n        onerror: Optional[Callable[[str, Exception], None]] = None,\n    ) -> \"Index\":\n        onerror = onerror or repo.stage_collection_error_handler\n        return cls.from_indexes(\n            repo, (idx for _, idx in collect_files(repo, onerror=onerror))\n        )\n\n    @classmethod\n    def from_file(cls, repo: \"Repo\", path: str) -> \"Index\":\n        from dvc.dvcfile import load_file\n\n        dvcfile = load_file(repo, path)\n        return cls(\n            repo,\n            stages=list(dvcfile.stages.values()),\n            metrics={path: dvcfile.metrics} if dvcfile.metrics else {},\n            plots={path: dvcfile.plots} if dvcfile.plots else {},\n            params={path: dvcfile.params} if dvcfile.params else {},\n            artifacts={path: dvcfile.artifacts} if dvcfile.artifacts else {},\n            datasets={path: dvcfile.datasets} if dvcfile.datasets else {},\n            datasets_lock={path: dvcfile.datasets_lock}\n            if dvcfile.datasets_lock\n            else {},\n        )\n\n    def update(self, stages: Iterable[\"Stage\"]) -> \"Self\":\n        stages = set(stages)\n        # we remove existing stages with same hashes at first\n        # and then re-add the new ones later.\n        stages_set = (set(self.stages) - stages) | stages\n        return self.__class__(\n            self.repo,\n            stages=list(stages_set),\n            metrics=self._metrics,\n            plots=self._plots,\n            params=self._params,\n            artifacts=self._artifacts,\n            datasets=self._datasets,\n        )\n\n    @classmethod\n    def from_indexes(cls, repo, idxs: Iterable[\"Self\"]) -> \"Self\":\n        stages = []\n        metrics = {}\n        plots = {}\n        params = {}\n        artifacts = {}\n        datasets = {}\n        datasets_lock = {}\n\n        for idx in idxs:\n            stages.extend(idx.stages)\n            metrics.update(idx._metrics)\n            plots.update(idx._plots)\n            params.update(idx._params)\n            artifacts.update(idx._artifacts)\n            datasets.update(idx._datasets)\n            datasets_lock.update(idx._datasets_lock)\n\n        return cls(\n            repo,\n            stages=stages,\n            metrics=metrics,\n            plots=plots,\n            params=params,\n            artifacts=artifacts,\n            datasets=datasets,\n            datasets_lock=datasets_lock,\n        )\n\n    @cached_property\n    def outs_trie(self) -> \"Trie\":\n        from dvc.repo.trie import build_outs_trie\n\n        return build_outs_trie(self.stages)\n\n    @cached_property\n    def outs_graph(self) -> \"DiGraph\":\n        from dvc.repo.graph import build_outs_graph\n\n        return build_outs_graph(self.graph, self.outs_trie)\n\n    @cached_property\n    def graph(self) -> \"DiGraph\":\n        from dvc.repo.graph import build_graph\n\n        return build_graph(self.stages, self.outs_trie)\n\n    def check_graph(self) -> None:\n        if not getattr(self.repo, \"_skip_graph_checks\", False):\n            self.graph  # noqa: B018\n\n    @property\n    def params(self) -> Iterator[\"ParamsDependency\"]:\n        from dvc.dependency import ParamsDependency\n\n        for dep in self.deps:\n            if isinstance(dep, ParamsDependency):\n                yield dep\n\n    @property\n    def outs(self) -> Iterator[\"Output\"]:\n        for stage in self.stages:\n            yield from stage.outs\n\n    @cached_property\n    def out_data_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        by_workspace[\"repo\"] = set()\n        by_workspace[\"local\"] = set()\n\n        for out in self.outs:\n            if not out.use_cache:\n                continue\n\n            ws, key = out.index_key\n            by_workspace[ws].add(key)\n\n        return dict(by_workspace)\n\n    @property\n    def decorated_outs(self) -> Iterator[\"Output\"]:\n        for output in self.outs:\n            if output.is_decorated:\n                yield output\n\n    @property\n    def metrics(self) -> Iterator[\"Output\"]:\n        for output in self.outs:\n            if output.is_metric:\n                yield output\n\n    @property\n    def plots(self) -> Iterator[\"Output\"]:\n        for output in self.outs:\n            if output.is_plot:\n                yield output\n\n    @property\n    def deps(self) -> Iterator[\"Dependency\"]:\n        for stage in self.stages:\n            yield from stage.deps\n\n    @cached_property\n    def _plot_sources(self) -> list[str]:\n        from dvc.repo.plots import _collect_pipeline_files\n\n        sources: list[str] = []\n        for data in _collect_pipeline_files(self.repo, [], {}).values():\n            for plot_id, props in data.get(\"data\", {}).items():\n                if isinstance(props.get(\"y\"), dict):\n                    sources.extend(props[\"y\"])\n                    if isinstance(props.get(\"x\"), dict):\n                        sources.extend(props[\"x\"])\n                else:\n                    sources.append(plot_id)\n        return sources\n\n    @cached_property\n    def data_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        by_workspace[\"repo\"] = set()\n        by_workspace[\"local\"] = set()\n\n        for out in self.outs:\n            if not out.use_cache:\n                continue\n\n            workspace, key = out.index_key\n            by_workspace[workspace].add(key)\n\n        return dict(by_workspace)\n\n    @cached_property\n    def metric_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        from .metrics.show import _collect_top_level_metrics\n\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        by_workspace[\"repo\"] = set()\n\n        for out in self.outs:\n            if not out.metric:\n                continue\n\n            workspace, key = out.index_key\n            by_workspace[workspace].add(key)\n\n        for path in _collect_top_level_metrics(self.repo):\n            key = self.repo.fs.relparts(path, self.repo.root_dir)\n            by_workspace[\"repo\"].add(key)\n\n        return dict(by_workspace)\n\n    @cached_property\n    def param_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        from .params.show import _collect_top_level_params\n\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n        by_workspace[\"repo\"] = set()\n\n        param_paths = _collect_top_level_params(self.repo)\n        default_file: str = ParamsDependency.DEFAULT_PARAMS_FILE\n        if self.repo.fs.exists(f\"{self.repo.fs.root_marker}{default_file}\"):\n            param_paths = chain(param_paths, [default_file])\n\n        for path in param_paths:\n            key = self.repo.fs.relparts(path, self.repo.root_dir)\n            by_workspace[\"repo\"].add(key)\n\n        return dict(by_workspace)\n\n    @cached_property\n    def plot_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        by_workspace[\"repo\"] = set()\n\n        for out in self.outs:\n            if not out.plot:\n                continue\n\n            workspace, key = out.index_key\n            by_workspace[workspace].add(key)\n\n        for path in self._plot_sources:\n            key = self.repo.fs.parts(path)\n            by_workspace[\"repo\"].add(key)\n\n        return dict(by_workspace)\n\n    @cached_property\n    def data_tree(self):\n        return _build_tree_from_outs(self.outs)\n\n    @cached_property\n    def data(self) -> \"dict[str, DataIndex]\":\n        prefix: DataIndexKey\n        loaded = False\n\n        index = self.repo.data_index\n        prefix = (\"tree\", self.data_tree.hash_info.value)\n        if index.has_node(prefix):\n            loaded = True\n\n        if not loaded:\n            _load_data_from_outs(index, prefix, self.outs)\n            index.commit()\n\n        by_workspace = {}\n        by_workspace[\"repo\"] = index.view((*prefix, \"repo\"))\n        by_workspace[\"local\"] = index.view((*prefix, \"local\"))\n\n        for out in self.outs:\n            if not out.use_cache:\n                continue\n\n            if not out.is_in_repo:\n                continue\n\n            ws, key = out.index_key\n            if ws not in by_workspace:\n                by_workspace[ws] = index.view((*prefix, ws))\n\n            data_index = by_workspace[ws]\n            _load_storage_from_out(data_index.storage_map, key, out)\n\n        return by_workspace\n\n    @staticmethod\n    def _hash_targets(targets: Iterable[Optional[str]], **kwargs: Any) -> int:\n        return hash(\n            (\n                frozenset(targets),\n                kwargs.get(\"with_deps\", False),\n                kwargs.get(\"recursive\", False),\n            )\n        )\n\n    def collect_targets(\n        self, targets: Optional[\"TargetType\"], *, onerror=None, **kwargs: Any\n    ) -> list[\"StageInfo\"]:\n        from dvc.exceptions import DvcException\n        from dvc.repo.stage import StageInfo\n        from dvc.utils.collections import ensure_list\n\n        if not onerror:\n\n            def onerror(_target, _exc):\n                raise  # noqa: PLE0704\n\n        targets = ensure_list(targets)\n        if not targets:\n            return [StageInfo(stage) for stage in self.stages]\n        targets_hash = self._hash_targets(targets, **kwargs)\n        if targets_hash not in self._collected_targets:\n            collected = []\n            for target in targets:\n                try:\n                    collected.extend(self.repo.stage.collect_granular(target, **kwargs))\n                except DvcException as exc:\n                    onerror(target, exc)\n            self._collected_targets[targets_hash] = collected\n\n        return self._collected_targets[targets_hash]\n\n    def used_objs(\n        self,\n        targets: Optional[\"TargetType\"] = None,\n        with_deps: bool = False,\n        remote: Optional[str] = None,\n        force: bool = False,\n        recursive: bool = False,\n        jobs: Optional[int] = None,\n        push: bool = False,\n    ) -> \"ObjectContainer\":\n        used: ObjectContainer = defaultdict(set)\n        pairs = self.collect_targets(targets, recursive=recursive, with_deps=with_deps)\n        for stage, filter_info in pairs:\n            for odb, objs in stage.get_used_objs(\n                remote=remote,\n                force=force,\n                jobs=jobs,\n                filter_info=filter_info,\n                push=push,\n            ).items():\n                used[odb].update(objs)\n        return used\n\n    def _types_filter(self, types, out):\n        ws, okey = out.index_key\n        for typ in types:\n            if typ == \"plots\":\n                keys = self.plot_keys\n            elif typ == \"metrics\":\n                keys = self.metric_keys\n            elif typ == \"params\":\n                keys = self.param_keys\n            else:\n                raise ValueError(f\"unsupported type {typ}\")\n\n            for key in keys.get(ws, []):\n                if (len(key) >= len(okey) and key[: len(okey)] == okey) or (\n                    len(key) < len(okey) and okey[: len(key)] == key\n                ):\n                    return True\n\n        return False\n\n    def targets_view(\n        self,\n        targets: Optional[\"TargetType\"],\n        stage_filter: Optional[Callable[[\"Stage\"], bool]] = None,\n        outs_filter: Optional[Callable[[\"Output\"], bool]] = None,\n        max_size: Optional[int] = None,\n        types: Optional[list[str]] = None,\n        **kwargs: Any,\n    ) -> \"IndexView\":\n        \"\"\"Return read-only view of index for the specified targets.\n        Args:\n            targets: Targets to collect\n            stage_filter: Optional stage filter to be applied after collecting\n                targets.\n            outs_filter: Optional output filter to be applied after collecting\n                targets.\n        Additional kwargs will be passed into the stage collector.\n        Note:\n            If both stage_filter and outs_filter are provided, stage_filter\n            will be applied first, and the resulting view will only contain\n            outputs from stages that matched stage_filter. Outputs from stages\n            that did not match will be excluded from the view (whether or not\n            the output would have matched outs_filter).\n        \"\"\"\n        stage_infos = [\n            stage_info\n            for stage_info in self.collect_targets(targets, **kwargs)\n            if not stage_filter or stage_filter(stage_info.stage)\n        ]\n\n        def _outs_filter(out):\n            if max_size and out.meta and out.meta.size and out.meta.size >= max_size:\n                return False\n\n            if types and not self._types_filter(types, out):\n                return False\n\n            if outs_filter:\n                return outs_filter(out)\n\n            return True\n\n        return IndexView(self, stage_infos, outs_filter=_outs_filter)\n\n\nclass _DataPrefixes(NamedTuple):\n    explicit: set[\"DataIndexKey\"]\n    recursive: set[\"DataIndexKey\"]\n\n\nclass IndexView:\n    \"\"\"Read-only view of Index.data using filtered stages.\"\"\"\n\n    def __init__(\n        self,\n        index: Index,\n        stage_infos: Iterable[\"StageInfo\"],\n        outs_filter: Optional[Callable[[\"Output\"], bool]],\n    ):\n        self._index = index\n        self._stage_infos = stage_infos\n        # NOTE: stage_infos might have the same stage multiple times but with\n        # different filter_info\n        self.stages = list({stage for stage, _ in stage_infos})\n        self._outs_filter = outs_filter\n\n    @property\n    def repo(self) -> \"Repo\":\n        return self._index.repo\n\n    @property\n    def deps(self) -> Iterator[\"Dependency\"]:\n        for stage in self.stages:\n            yield from stage.deps\n\n    @property\n    def index(self) -> \"Index\":\n        return self._index\n\n    @property\n    def _filtered_outs(self) -> Iterator[tuple[\"Output\", Optional[str]]]:\n        for stage, filter_info in self._stage_infos:\n            for out in stage.filter_outs(filter_info):\n                if not self._outs_filter or self._outs_filter(out):\n                    yield out, filter_info\n\n    @property\n    def outs(self) -> Iterator[\"Output\"]:\n        yield from {out for (out, _) in self._filtered_outs}\n\n    @cached_property\n    def out_data_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        by_workspace: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        by_workspace[\"repo\"] = set()\n        by_workspace[\"local\"] = set()\n\n        for out in self.outs:\n            if not out.use_cache:\n                continue\n\n            ws, key = out.index_key\n            by_workspace[ws].add(key)\n\n        return dict(by_workspace)\n\n    @cached_property\n    def _data_prefixes(self) -> dict[str, \"_DataPrefixes\"]:\n        prefixes: dict[str, _DataPrefixes] = defaultdict(\n            lambda: _DataPrefixes(set(), set())\n        )\n        for out, filter_info in self._filtered_outs:\n            if not out.use_cache:\n                continue\n            workspace, key = out.index_key\n            if filter_info and out.fs.isin(filter_info, out.fs_path):\n                key = key + out.fs.relparts(filter_info, out.fs_path)\n            entry = self._index.data[workspace].get(key)\n            if entry and entry.meta and entry.meta.isdir:\n                prefixes[workspace].recursive.add(key)\n            prefixes[workspace].explicit.update(key[:i] for i in range(len(key), 0, -1))\n        return prefixes\n\n    @cached_property\n    def data_keys(self) -> dict[str, set[\"DataIndexKey\"]]:\n        ret: dict[str, set[DataIndexKey]] = defaultdict(set)\n\n        for out, filter_info in self._filtered_outs:\n            if not out.use_cache:\n                continue\n\n            workspace, key = out.index_key\n            if filter_info and out.fs.isin(filter_info, out.fs_path):\n                key = key + out.fs.relparts(filter_info, out.fs_path)\n            ret[workspace].add(key)\n\n        return dict(ret)\n\n    @cached_property\n    def data_tree(self):\n        return _build_tree_from_outs(self.outs)\n\n    @cached_property\n    def data(self) -> dict[str, Union[\"DataIndex\", \"DataIndexView\"]]:\n        from dvc_data.index import DataIndex, view\n\n        def key_filter(workspace: str, key: \"DataIndexKey\"):\n            try:\n                prefixes = self._data_prefixes[workspace]\n                return key in prefixes.explicit or any(\n                    key[: len(prefix)] == prefix for prefix in prefixes.recursive\n                )\n            except KeyError:\n                return False\n\n        data: dict[str, Union[DataIndex, DataIndexView]] = {}\n        for workspace, data_index in self._index.data.items():\n            if self.stages:\n                data[workspace] = view(data_index, partial(key_filter, workspace))\n            else:\n                data[workspace] = DataIndex()\n        return data\n\n\ndef build_data_index(  # noqa: C901, PLR0912\n    index: Union[\"Index\", \"IndexView\"],\n    path: str,\n    fs: \"FileSystem\",\n    workspace: str = \"repo\",\n    compute_hash: Optional[bool] = False,\n    callback: \"Callback\" = DEFAULT_CALLBACK,\n) -> \"DataIndex\":\n    from dvc_data.index import DataIndex, DataIndexEntry, Meta\n    from dvc_data.index.build import build_entries, build_entry\n    from dvc_data.index.save import build_tree\n\n    ignore = None\n    if workspace == \"repo\" and isinstance(fs, LocalFileSystem):\n        ignore = index.repo.dvcignore\n\n    data = DataIndex()\n    parents = set()\n    for key in index.data_keys.get(workspace, set()):\n        out_path = fs.join(path, *key)\n\n        for key_len in range(1, len(key)):\n            parents.add(key[:key_len])\n\n        if not fs.exists(out_path):\n            continue\n\n        hash_name = _get_entry_hash_name(index, workspace, key)\n        try:\n            out_entry = build_entry(\n                out_path,\n                fs,\n                compute_hash=compute_hash,\n                state=index.repo.state,\n                hash_name=hash_name,\n            )\n        except FileNotFoundError:\n            out_entry = DataIndexEntry()\n\n        out_entry.key = key\n        data.add(out_entry)\n        callback.relative_update(1)\n\n        if not out_entry.meta or not out_entry.meta.isdir:\n            continue\n\n        for entry in build_entries(\n            out_path,\n            fs,\n            compute_hash=compute_hash,\n            state=index.repo.state,\n            ignore=ignore,\n            hash_name=hash_name,\n        ):\n            if not entry.key or entry.key == (\"\",):\n                # NOTE: whether the root will be returned by build_entries\n                # depends on the filesystem (e.g. local doesn't, but s3 does).\n                continue\n\n            entry.key = key + entry.key\n            data.add(entry)\n            callback.relative_update(1)\n\n    for key in parents:\n        parent_path = fs.join(path, *key)\n        if not fs.exists(parent_path):\n            continue\n        direntry = DataIndexEntry(key=key, meta=Meta(isdir=True), loaded=True)\n        data.add(direntry)\n        callback.relative_update(1)\n\n    if compute_hash:\n        out_keys = index.out_data_keys.get(workspace, set())\n        data_keys = index.data_keys.get(workspace, set())\n        for key in data_keys.intersection(out_keys):\n            hash_name = _get_entry_hash_name(index, workspace, key)\n\n            out_entry = data.get(key)\n            if not out_entry or not out_entry.isdir:\n                continue\n\n            tree_meta, tree = build_tree(data, key, name=hash_name)\n            out_entry.meta = tree_meta\n            out_entry.hash_info = tree.hash_info\n            out_entry.loaded = True\n            data.add(out_entry)\n            callback.relative_update(1)\n\n    return data\n\n\ndef _get_entry_hash_name(\n    index: Union[\"Index\", \"IndexView\"], workspace: str, key: \"DataIndexKey\"\n) -> str:\n    from dvc_data.hashfile.hash import DEFAULT_ALGORITHM\n\n    for idx in reversed(range(len(key) + 1)):\n        prefix = key[:idx]\n        try:\n            src_entry = index.data[workspace][prefix]\n        except KeyError:\n            continue\n\n        if src_entry.hash_info and src_entry.hash_info.name:\n            return src_entry.hash_info.name\n\n    return DEFAULT_ALGORITHM\n\n\ndef index_from_targets(\n    repo: \"Repo\",\n    targets: Optional[\"TargetType\"] = None,\n    stage_filter: Optional[Callable[[\"Stage\"], bool]] = None,\n    outs_filter: Optional[Callable[[\"Output\"], bool]] = None,\n    max_size: Optional[int] = None,\n    types: Optional[list[str]] = None,\n    with_deps: bool = False,\n    recursive: bool = False,\n    **kwargs: Any,\n) -> \"IndexView\":\n    from dvc.stage.exceptions import StageFileDoesNotExistError, StageNotFound\n    from dvc.utils import parse_target\n\n    index: Optional[Index] = None\n    if targets and all(targets) and not with_deps and not recursive:\n        indexes: list[Index] = []\n        try:\n            for target in targets:\n                if not target:\n                    continue\n                file, name = parse_target(target)\n                if file and not name:\n                    index = Index.from_file(repo, file)\n                else:\n                    stages = repo.stage.collect(target)\n                    index = Index(repo, stages=list(stages))\n                indexes.append(index)\n        except (StageFileDoesNotExistError, StageNotFound):\n            pass\n        else:\n            index = Index.from_indexes(repo, indexes)\n            targets = None\n\n    if index is None:\n        index = repo.index\n    return index.targets_view(\n        targets,\n        stage_filter=stage_filter,\n        outs_filter=outs_filter,\n        max_size=max_size,\n        types=types,\n        recursive=recursive,\n        with_deps=with_deps,\n        **kwargs,\n    )\n"
  },
  {
    "path": "dvc/repo/init.py",
    "content": "import os\n\nfrom dvc.config import Config\nfrom dvc.exceptions import InitError, InvalidArgumentError\nfrom dvc.ignore import init as init_dvcignore\nfrom dvc.log import logger\nfrom dvc.repo import Repo\nfrom dvc.scm import SCM, SCMError\nfrom dvc.utils import relpath\nfrom dvc.utils.fs import remove\n\nlogger = logger.getChild(__name__)\n\n\ndef init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):  # noqa: C901\n    \"\"\"\n    Creates an empty repo on the given directory -- basically a\n    `.dvc` directory with subdirectories for configuration and cache.\n\n    It should be tracked by a SCM or use the `--no-scm` flag.\n\n    If the given directory is not empty, you must use the `--force`\n    flag to override it.\n\n    Args:\n        root_dir: Path to repo's root directory.\n\n    Returns:\n        Repo instance.\n\n    Raises:\n        KeyError: Raises an exception.\n    \"\"\"\n\n    if no_scm and subdir:\n        raise InvalidArgumentError(\n            \"Cannot initialize repo with `--no-scm` and `--subdir`\"\n        )\n\n    root_dir = os.path.abspath(root_dir)\n    dvc_dir = os.path.join(root_dir, Repo.DVC_DIR)\n\n    try:\n        scm = SCM(root_dir, search_parent_directories=subdir, no_scm=no_scm)\n    except SCMError:\n        raise InitError(  # noqa: B904\n            f\"{root_dir} is not tracked by any supported SCM tool (e.g. Git). \"\n            \"Use `--no-scm` if you don't want to use any SCM or \"\n            \"`--subdir` if initializing inside a subdirectory of a parent SCM \"\n            \"repository.\"\n        )\n\n    if scm.is_ignored(dvc_dir):\n        raise InitError(\n            f\"{dvc_dir} is ignored by your SCM tool. \\n\"\n            \"Make sure that it's tracked, \"\n            \"for example, by adding '!.dvc' to .gitignore.\"\n        )\n\n    if os.path.isdir(dvc_dir):\n        if not force:\n            raise InitError(f\"'{relpath(dvc_dir)}' exists. Use `-f` to force.\")\n\n        remove(dvc_dir)\n\n    os.makedirs(dvc_dir, exist_ok=True)\n\n    config = Config.init(dvc_dir)\n\n    if no_scm:\n        with config.edit() as conf:\n            conf[\"core\"][\"no_scm\"] = True\n\n    dvcignore = init_dvcignore(root_dir)\n\n    proj = Repo(root_dir)\n\n    if os.path.isdir(proj.site_cache_dir):\n        proj.close()\n        try:\n            remove(proj.site_cache_dir)\n        except OSError:\n            logger.debug(\"failed to remove %s\", dvc_dir, exc_info=True)\n        proj = Repo(root_dir)\n\n    with proj.scm_context(autostage=True) as context:\n        files = [config.files[\"repo\"], dvcignore]\n        ignore_file = context.scm.ignore_file\n        if ignore_file:\n            files.extend([os.path.join(dvc_dir, ignore_file)])\n        proj.scm_context.track_file(files)\n\n    logger.info(\"Initialized DVC repository.\\n\")\n    if not no_scm:\n        logger.info(\"You can now commit the changes to git.\\n\")\n    return proj\n"
  },
  {
    "path": "dvc/repo/install.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc import version_tuple\nfrom dvc.exceptions import DvcException\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\n\ndef pre_commit_install(scm: \"Git\") -> None:\n    import os\n\n    from dvc.utils.serialize import modify_yaml\n\n    config_path = os.path.join(scm.root_dir, \".pre-commit-config.yaml\")\n    with modify_yaml(config_path) as config:\n        entry = {\n            \"repo\": \"https://github.com/treeverse/dvc\",\n            \"rev\": \".\".join(map(str, version_tuple[:3])),\n            \"hooks\": [\n                {\n                    \"id\": \"dvc-pre-commit\",\n                    \"additional_dependencies\": [\".[all]\"],\n                    \"language_version\": \"python3\",\n                    \"stages\": [\"pre-commit\"],\n                },\n                {\n                    \"id\": \"dvc-pre-push\",\n                    \"additional_dependencies\": [\".[all]\"],\n                    \"language_version\": \"python3\",\n                    \"stages\": [\"pre-push\"],\n                },\n                {\n                    \"id\": \"dvc-post-checkout\",\n                    \"additional_dependencies\": [\".[all]\"],\n                    \"language_version\": \"python3\",\n                    \"stages\": [\"post-checkout\"],\n                    \"always_run\": True,\n                },\n            ],\n        }\n\n        config[\"repos\"] = config.get(\"repos\", [])\n        if entry not in config[\"repos\"]:\n            config[\"repos\"].append(entry)\n\n\ndef install_hooks(scm: \"Git\") -> None:\n    from scmrepo.exceptions import GitHookAlreadyExists\n\n    from dvc.utils import format_link\n\n    hooks = [\"post-checkout\", \"pre-commit\", \"pre-push\"]\n    for hook in hooks:\n        try:\n            scm.verify_hook(hook)\n        except GitHookAlreadyExists as exc:\n            link = format_link(\"https://man.dvc.org/install\")\n            raise DvcException(  # noqa: B904\n                f\"{exc}. Please refer to {link} for more info.\"\n            )\n\n    for hook in hooks:\n        scm.install_hook(hook, f\"exec dvc git-hook {hook} $@\")\n\n\ndef install(self: \"Repo\", use_pre_commit_tool: bool = False) -> None:\n    \"\"\"Adds dvc commands to SCM hooks for the repo.\n\n    If use_pre_commit_tool is set and pre-commit is installed it will be used\n    to install the hooks.\n    \"\"\"\n    from dvc.scm import Git\n\n    scm = self.scm\n    if not isinstance(scm, Git):\n        return None\n\n    driver = \"dvc git-hook merge-driver --ancestor %O --our %A --their %B \"\n    scm.install_merge_driver(\"dvc\", \"DVC merge driver\", driver)\n\n    if use_pre_commit_tool:\n        return pre_commit_install(scm)\n\n    return install_hooks(scm)\n"
  },
  {
    "path": "dvc/repo/ls.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nif TYPE_CHECKING:\n    from dvc.fs.dvc import DVCFileSystem\n\n\ndef _open_repo(\n    url: str,\n    rev: Optional[str] = None,\n    config: Union[dict[str, Any], str, None] = None,\n    remote: Optional[str] = None,\n    remote_config: Optional[dict] = None,\n):\n    from dvc.config import Config\n\n    from . import Repo\n\n    if config and not isinstance(config, dict):\n        config_dict = Config.load_file(config)\n    else:\n        config_dict = None\n\n    return Repo.open(\n        url,\n        rev=rev,\n        subrepos=True,\n        uninitialized=True,\n        config=config_dict,\n        remote=remote,\n        remote_config=remote_config,\n    )\n\n\ndef _adapt_info(info: dict[str, Any]) -> dict[str, Any]:\n    dvc_info = info.get(\"dvc_info\", {})\n    return {\n        \"isout\": dvc_info.get(\"isout\", False),\n        \"isdir\": info[\"type\"] == \"directory\",\n        \"isexec\": info.get(\"isexec\", False),\n        \"size\": info.get(\"size\"),\n        \"md5\": dvc_info.get(\"md5\") or dvc_info.get(\"md5-dos2unix\"),\n    }\n\n\ndef ls(\n    url: str,\n    path: Optional[str] = None,\n    rev: Optional[str] = None,\n    recursive: Optional[bool] = None,\n    dvc_only: bool = False,\n    config: Union[dict[str, Any], str, None] = None,\n    remote: Optional[str] = None,\n    remote_config: Optional[dict] = None,\n    maxdepth: Optional[int] = None,\n):\n    \"\"\"Methods for getting files and outputs for the repo.\n\n    Args:\n        url (str): the repo url\n        path (str, optional): relative path into the repo\n        rev (str, optional): SHA commit, branch or tag name\n        recursive (bool, optional): recursively walk the repo\n        dvc_only (bool, optional): show only DVC-artifacts\n        config (str, optional): path to config file\n        remote (str, optional): remote name to set as a default remote in the repo\n        remote_config (str, dict): remote config to merge with a remote in the repo\n\n    Returns:\n        list of `entry`\n\n    Notes:\n        `entry` is a dictionary with structure\n        {\n            \"path\": str,\n            \"isout\": bool,\n            \"isdir\": bool,\n            \"isexec\": bool,\n        }\n    \"\"\"\n    with _open_repo(url, rev, config, remote, remote_config) as repo:\n        path = path or \"\"\n        fs: DVCFileSystem = repo.dvcfs\n        fs_path = fs.from_os_path(path)\n        return _ls(fs, fs_path, recursive, dvc_only, maxdepth)\n\n\ndef ls_tree(\n    url: str,\n    path: Optional[str] = None,\n    rev: Optional[str] = None,\n    dvc_only: bool = False,\n    config: Union[dict[str, Any], str, None] = None,\n    remote: Optional[str] = None,\n    remote_config: Optional[dict] = None,\n    maxdepth: Optional[int] = None,\n):\n    with _open_repo(url, rev, config, remote, remote_config) as repo:\n        path = path or \"\"\n        fs: DVCFileSystem = repo.dvcfs\n        fs_path = fs.from_os_path(path)\n        return _ls_tree(\n            fs, fs_path, maxdepth=maxdepth, dvc_only=dvc_only, dvcfiles=True\n        )\n\n\ndef _ls(\n    fs: \"DVCFileSystem\",\n    path: str,\n    recursive: Optional[bool] = None,\n    dvc_only: bool = False,\n    maxdepth: Optional[int] = None,\n):\n    fs_path = fs.info(path)[\"name\"]\n\n    infos = {}\n\n    # ignore maxdepth only if recursive is not set\n    maxdepth = maxdepth if recursive else None\n    if maxdepth == 0 or fs.isfile(fs_path):\n        infos[os.path.basename(path) or os.curdir] = fs.info(fs_path)\n    else:\n        for root, dirs, files in fs.walk(\n            fs_path,\n            dvcfiles=True,\n            dvc_only=dvc_only,\n            detail=True,\n            maxdepth=maxdepth,\n        ):\n            parts = fs.relparts(root, fs_path)\n            if parts == (\".\",):\n                parts = ()\n            if not recursive or (maxdepth and len(parts) >= maxdepth - 1):\n                files.update(dirs)\n\n            for name, entry in files.items():\n                infos[os.path.join(*parts, name)] = entry\n\n            if not recursive:\n                break\n\n    ret_list = []\n    for p, info in sorted(infos.items(), key=lambda x: x[0]):\n        _info = _adapt_info(info)\n        _info[\"path\"] = p\n        ret_list.append(_info)\n    return ret_list\n\n\ndef _ls_tree(fs, path, maxdepth=None, _info=None, **fs_kwargs):\n    info = _info or fs.info(path)\n    if _info is None:\n        # preserve the original path name\n        name = path\n        if not name:\n            name = os.curdir if fs.protocol == \"local\" else fs.root_marker\n        path = info[\"name\"]\n    else:\n        name = path.rsplit(fs.sep, 1)[-1]\n\n    ret = {}\n    ls_info = _adapt_info(info)\n    ls_info[\"path\"] = path\n\n    recurse = maxdepth is None or maxdepth > 0\n    if recurse and info[\"type\"] == \"directory\":\n        try:\n            infos = fs.ls(path, detail=True, **fs_kwargs)\n        except FileNotFoundError:\n            # broken symlink?\n            infos = []\n\n        infos.sort(key=lambda f: f[\"name\"])\n        maxdepth = maxdepth - 1 if maxdepth is not None else None\n        contents = {}\n        for info in infos:\n            d = _ls_tree(fs, info[\"name\"], maxdepth=maxdepth, _info=info, **fs_kwargs)\n            contents.update(d)\n        ls_info[\"contents\"] = contents\n\n    ret[name] = ls_info\n    return ret\n"
  },
  {
    "path": "dvc/repo/ls_url.py",
    "content": "from fsspec.implementations.local import LocalFileSystem as _LocalFileSystem\n\nfrom dvc.exceptions import URLMissingError\nfrom dvc.fs import LocalFileSystem, parse_external_url\n\n\ndef ls_url(url, *, fs_config=None, recursive=False, maxdepth=None, config=None):\n    fs, fs_path = parse_external_url(url, fs_config=fs_config, config=config)\n    try:\n        info = fs.info(fs_path)\n    except FileNotFoundError as exc:\n        raise URLMissingError(url) from exc\n    if maxdepth == 0 or info[\"type\"] != \"directory\":\n        return [{\"path\": info[\"name\"], \"isdir\": False}]\n\n    if isinstance(fs, LocalFileSystem):\n        # dvc's LocalFileSystem does not support maxdepth yet\n        walk = _LocalFileSystem().walk\n    else:\n        walk = fs.walk\n\n    ret = []\n    for root, dirs, files in walk(fs_path, detail=True, maxdepth=maxdepth):\n        parts = fs.relparts(root, fs_path)\n        if parts == (\".\",):\n            parts = ()\n        if not recursive or (maxdepth and len(parts) >= maxdepth - 1):\n            files.update(dirs)\n\n        for info in files.values():\n            ls_info = {\n                \"path\": fs.relpath(info[\"name\"], fs_path),\n                \"isdir\": info[\"type\"] == \"directory\",\n                \"size\": info.get(\"size\"),\n            }\n            ret.append(ls_info)\n\n        if not recursive:\n            break\n\n    return ret\n"
  },
  {
    "path": "dvc/repo/metrics/__init__.py",
    "content": "class Metrics:\n    def __init__(self, repo):\n        self.repo = repo\n\n    def show(self, *args, **kwargs):\n        from dvc.repo.metrics.show import show\n\n        return show(self.repo, *args, **kwargs)\n\n    def diff(self, *args, **kwargs):\n        from .diff import diff\n\n        return diff(self.repo, *args, **kwargs)\n"
  },
  {
    "path": "dvc/repo/metrics/diff.py",
    "content": "from typing import TYPE_CHECKING, TypedDict, Union\n\nfrom funcy import compact\n\nfrom dvc.utils.diff import diff as _diff_dict\nfrom dvc.utils.diff import format_dict\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n    from .show import Result\n\n\nclass DiffResult(TypedDict, total=False):\n    errors: dict[str, Union[Exception, dict[str, Exception]]]\n    diff: dict[str, dict[str, dict]]\n\n\ndef _diff(\n    result: dict[str, \"Result\"],\n    old_rev: str,\n    new_rev: str,\n    **kwargs,\n) -> DiffResult:\n    old = result.get(old_rev, {})\n    new = result.get(new_rev, {})\n\n    old_data = old.get(\"data\", {})\n    new_data = new.get(\"data\", {})\n\n    res = DiffResult()\n    errors = res.setdefault(\"errors\", {})\n\n    if old_error := old.get(\"error\"):\n        errors[old_rev] = old_error\n    else:\n        errors[old_rev] = {f: d[\"error\"] for f, d in old_data.items() if \"error\" in d}\n\n    if new_error := new.get(\"error\"):\n        errors[new_rev] = new_error\n    else:\n        errors[new_rev] = {f: d[\"error\"] for f, d in new_data.items() if \"error\" in d}\n\n    diff_data = _diff_dict(format_dict(old_data), format_dict(new_data), **kwargs)\n    res = DiffResult(errors=errors, diff=diff_data)\n    res[\"errors\"] = compact(res.get(\"errors\", {}))  # type: ignore[assignment]\n    return compact(res)  # type: ignore[no-any-return]\n\n\ndef diff(\n    repo: \"Repo\",\n    a_rev: str = \"HEAD\",\n    b_rev: str = \"workspace\",\n    all: bool = False,  # noqa: A002\n    **kwargs,\n) -> DiffResult:\n    if repo.scm.no_commits:\n        return {}\n\n    metrics = repo.metrics.show(revs=[a_rev, b_rev], hide_workspace=False, **kwargs)\n    return _diff(metrics, a_rev, b_rev, with_unchanged=all)\n"
  },
  {
    "path": "dvc/repo/metrics/show.py",
    "content": "import logging\nimport os\nfrom collections.abc import Iterable, Iterator\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any, Optional, TypedDict, Union\n\nfrom funcy import ldistinct\nfrom scmrepo.exceptions import SCMError\n\nfrom dvc.log import logger\nfrom dvc.scm import NoSCMError\nfrom dvc.utils import as_posix\nfrom dvc.utils.collections import ensure_list\nfrom dvc.utils.serialize import load_path\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.scm import Git, NoSCM\n\nlogger = logger.getChild(__name__)\n\n\ndef _collect_top_level_metrics(repo: \"Repo\") -> Iterator[str]:\n    top_metrics = repo.index._metrics\n    for dvcfile, metrics in top_metrics.items():\n        wdir = repo.fs.relpath(repo.fs.parent(dvcfile), repo.root_dir)\n        for file in metrics:\n            path = repo.fs.join(wdir, as_posix(file))\n            yield repo.fs.normpath(path)\n\n\ndef _extract_metrics(metrics, path: str):\n    if isinstance(metrics, (int, float, str)):\n        return metrics\n\n    if not isinstance(metrics, dict):\n        return None\n\n    ret = {}\n    for key, val in metrics.items():\n        m = _extract_metrics(val, path)\n        if m not in (None, {}):\n            ret[key] = m\n        else:\n            logger.debug(\n                \"Could not parse %r metric from %r due to its unsupported type: %r\",\n                key,\n                path,\n                type(val).__name__,\n            )\n\n    return ret\n\n\ndef _read_metric(fs: \"FileSystem\", path: str, **load_kwargs) -> Any:\n    val = load_path(path, fs, **load_kwargs)\n    val = _extract_metrics(val, path)\n    return val or {}\n\n\ndef _read_metrics(\n    fs: \"FileSystem\", metrics: Iterable[str], **load_kwargs\n) -> Iterator[tuple[str, Union[Exception, Any]]]:\n    for metric in metrics:\n        try:\n            yield metric, _read_metric(fs, metric, **load_kwargs)\n        except Exception as exc:  # noqa: BLE001\n            logger.debug(exc)\n            yield metric, exc\n\n\ndef metrics_from_target(repo: \"Repo\", targets: list[str]) -> Iterator[\"Output\"]:\n    stages = chain.from_iterable(repo.stage.collect(target) for target in targets)\n    for stage in stages:\n        yield from stage.metrics\n\n\ndef _collect_metrics(\n    repo: \"Repo\",\n    targets: Optional[list[str]] = None,\n    stages: Optional[list[str]] = None,\n    outs_only: bool = False,\n) -> list[str]:\n    metrics: list[str] = []\n\n    if targets:\n        # target is a repo-relative path\n        metrics.extend(targets)\n\n    if not targets or outs_only:\n        outs = metrics_from_target(repo, stages) if stages else repo.index.metrics\n        relpath = repo.fs.relpath\n        metrics.extend(relpath(out.fs_path, repo.root_dir) for out in outs)\n\n    if not targets and not outs_only and not stages:\n        # _collect_top_level_metrics returns repo-relative paths\n        metrics.extend(_collect_top_level_metrics(repo))\n\n    fs = repo.dvcfs\n\n    # convert to posixpath for DVCFileSystem\n    paths = (fs.from_os_path(metric) for metric in metrics)\n    # make paths absolute for DVCFileSystem\n    repo_paths = (f\"{fs.root_marker}{path}\" for path in paths)\n    return ldistinct(try_expand_paths(fs, repo_paths))\n\n\nclass FileResult(TypedDict, total=False):\n    data: Any\n    error: Exception\n\n\nclass Result(TypedDict, total=False):\n    data: dict[str, FileResult]\n    error: Exception\n\n\ndef try_expand_paths(fs: \"FileSystem\", paths: Iterable[str]) -> Iterator[str]:\n    for path in paths:\n        try:\n            if fs.isdir(path):\n                yield from fs.find(path)\n                continue\n        except Exception as e:\n            logger.debug(\n                \"failed to expand %r: %s\",\n                path,\n                e,\n                exc_info=logger.isEnabledFor(logging.TRACE),  # type: ignore[attr-defined]\n            )\n        yield path\n\n\ndef to_relpath(fs: \"FileSystem\", root_dir: str, d: Result) -> Result:\n    relpath = fs.relpath\n    cwd = fs.getcwd()\n\n    start = relpath(cwd, root_dir)\n    data = d.get(\"data\")\n    if data is not None:\n        d[\"data\"] = {relpath(path, start): result for path, result in data.items()}\n    return d\n\n\ndef _gather_metrics(\n    repo: \"Repo\",\n    targets: Optional[list[str]] = None,\n    outs_only: bool = False,\n    stages: Optional[list[str]] = None,\n    on_error: str = \"return\",\n) -> dict[str, FileResult]:\n    assert on_error in (\"raise\", \"return\", \"ignore\")\n\n    # `files` is a repo-relative posixpath that can be passed to DVCFileSystem\n    # It is absolute, i.e. has a root_marker `/` in front which we strip when returning\n    # the result and convert to appropriate repo-relative os.path.\n    files = _collect_metrics(repo, targets=targets, stages=stages, outs_only=outs_only)\n    data = {}\n\n    fs = repo.dvcfs\n    for fs_path, result in _read_metrics(fs, files, cache=True):\n        repo_path = fs_path.lstrip(fs.root_marker)\n        repo_os_path = os.sep.join(fs.parts(repo_path))\n        if not isinstance(result, Exception):\n            data.update({repo_os_path: FileResult(data=result)})\n            continue\n\n        if on_error == \"raise\":\n            raise result\n        if on_error == \"return\":\n            data.update({repo_os_path: FileResult(error=result)})\n    return data\n\n\ndef _hide_workspace(\n    scm: Union[\"Git\", \"NoSCM\"], res: dict[str, Result]\n) -> dict[str, Result]:\n    # Hide workspace params if they are the same as in the active branch\n    try:\n        active_branch = scm.active_branch()\n    except (SCMError, NoSCMError):\n        # SCMError - detached head\n        # NoSCMError - no repo case\n        pass\n    else:\n        if res.get(\"workspace\") == res.get(active_branch):\n            res.pop(\"workspace\", None)\n\n    return res\n\n\ndef show(\n    repo: \"Repo\",\n    targets: Optional[list[str]] = None,\n    stages: Optional[list[str]] = None,\n    outs_only: bool = False,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    revs: Optional[list[str]] = None,\n    all_commits: bool = False,\n    hide_workspace: bool = True,\n    on_error: str = \"return\",\n) -> dict[str, Result]:\n    assert on_error in (\"raise\", \"return\", \"ignore\")\n\n    targets = [os.path.abspath(target) for target in ensure_list(targets)]\n    targets = [repo.dvcfs.from_os_path(target) for target in targets]\n\n    res = {}\n    for rev in repo.brancher(\n        revs=revs,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n    ):\n        try:\n            result = _gather_metrics(\n                repo,\n                targets=targets,\n                stages=stages,\n                outs_only=outs_only,\n                on_error=on_error,\n            )\n            res[rev] = Result(data=result)\n        except Exception as exc:\n            if on_error == \"raise\":\n                raise\n\n            logger.warning(\"failed to load metrics in revision %r, %s\", rev, str(exc))\n            if on_error == \"return\":\n                res[rev] = Result(error=exc)\n\n    if hide_workspace:\n        _hide_workspace(repo.scm, res)\n    return res\n"
  },
  {
    "path": "dvc/repo/move.py",
    "content": "import os\nfrom typing import TYPE_CHECKING\n\nfrom dvc.repo.scm_context import scm_context\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from . import Repo\n\n\ndef _expand_target_path(from_path, to_path):\n    if os.path.isdir(to_path):\n        return os.path.join(to_path, os.path.basename(from_path))\n    return to_path\n\n\n@locked\n@scm_context\ndef move(self: \"Repo\", from_path, to_path):\n    \"\"\"\n    Renames an output file and modifies the stage associated\n    to reflect the change on the pipeline.\n\n    If the output has the same name as its stage, it would\n    also rename the corresponding .dvc file.\n\n    E.g.\n          Having: (hello, hello.dvc)\n\n          $ dvc move hello greetings\n\n          Result: (greeting, greeting.dvc)\n\n    It only works with outputs generated by `add` or `import`,\n    also known as data sources.\n    \"\"\"\n    from dvc import dependency, output\n    from dvc.dvcfile import DVC_FILE_SUFFIX\n    from dvc.exceptions import MoveNotDataSourceError\n    from dvc.stage import Stage\n    from dvc.stage.exceptions import StageFileAlreadyExistsError\n    from dvc_objects.fs.local import LocalFileSystem\n\n    from_out = output.loads_from(Stage(self), [from_path])[0]\n    assert from_out.protocol == \"local\"\n\n    to_path = _expand_target_path(from_path, to_path)\n\n    outs = self.find_outs_by_path(from_out.fspath)\n    assert len(outs) == 1\n    out = outs[0]\n    stage = out.stage\n    deps = stage.deps\n\n    if not stage.is_data_source:\n        raise MoveNotDataSourceError(stage.addressing)\n\n    stage_name = os.path.splitext(os.path.basename(stage.path))[0]\n    from_name = os.path.basename(from_out.fspath)\n    if stage_name == from_name:\n        new_fname = os.path.join(\n            os.path.dirname(to_path),\n            os.path.basename(to_path) + DVC_FILE_SUFFIX,\n        )\n        new_wdir = os.path.abspath(os.path.join(os.curdir, os.path.dirname(to_path)))\n        to_path = os.path.relpath(to_path, new_wdir)\n        try:\n            new_stage = self.stage.create(\n                single_stage=True,\n                fname=new_fname,\n                wdir=new_wdir,\n                outs=[to_path],\n                meta=stage.meta,\n                frozen=stage.frozen,\n                always_changed=stage.always_changed,\n                desc=stage.desc,\n            )\n        except StageFileAlreadyExistsError:\n            # reraise to remove `--force` hint\n            raise StageFileAlreadyExistsError(f\"{new_fname!r} already exists\") from None\n    else:\n        new_stage = stage\n        to_path = os.path.relpath(to_path, stage.wdir)\n\n    def with_dep_path_adjusted(dep: dependency.Dependency):\n        d = dep.dumpd()\n        if isinstance(dep.fs, LocalFileSystem) and not os.path.isabs(dep.def_path):\n            return d | {\"path\": os.path.relpath(dep.fspath, new_stage.wdir)}\n        return d\n\n    new_stage.outs = output.loadd_from(new_stage, [out.dumpd() | {\"path\": to_path}])\n    new_stage.deps = dependency.loadd_from(\n        new_stage, [with_dep_path_adjusted(dep) for dep in deps]\n    )\n    out.move(new_stage.outs[0])\n    new_stage.md5 = new_stage.compute_md5()\n    new_stage.dump()\n    if stage != new_stage:\n        stage.dvcfile.remove()\n        self.scm_context.track_file(stage.dvcfile.relpath)\n    return stage, new_stage\n"
  },
  {
    "path": "dvc/repo/open_repo.py",
    "content": "import os\nimport tempfile\nimport threading\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import retry, wrap_with\n\nfrom dvc.exceptions import NotDvcRepoError\nfrom dvc.log import logger\nfrom dvc.repo import Repo\nfrom dvc.scm import CloneError, map_scm_exception\nfrom dvc.utils import relpath\n\nif TYPE_CHECKING:\n    from dvc.scm import Git\n\nlogger = logger.getChild(__name__)\n\n\n@map_scm_exception()\ndef _external_repo(url, rev: Optional[str] = None, **kwargs) -> \"Repo\":\n    logger.debug(\"Creating external repo %s@%s\", url, rev)\n    path = _cached_clone(url, rev)\n    # Local HEAD points to the tip of whatever branch we first cloned from\n    # (which may not be the default branch), use origin/HEAD here to get\n    # the tip of the default branch\n    rev = rev or \"refs/remotes/origin/HEAD\"\n\n    config = _get_remote_config(url) if os.path.isdir(url) else {}\n    config.update({\"cache\": {\"dir\": _get_cache_dir(url)}})\n    config.update(kwargs.pop(\"config\", None) or {})\n\n    main_root = \"/\"\n    repo_kwargs = dict(\n        root_dir=path,\n        url=url,\n        config=config,\n        repo_factory=erepo_factory(url, main_root, {\"cache\": config[\"cache\"]}),\n        rev=rev,\n        **kwargs,\n    )\n\n    return Repo(**repo_kwargs)\n\n\ndef open_repo(url, *args, **kwargs):\n    if url is None:\n        url = os.getcwd()\n\n    if os.path.exists(url):\n        url = os.path.abspath(url)\n        try:\n            config = _get_remote_config(url)\n            config.update(kwargs.get(\"config\") or {})\n            kwargs[\"config\"] = config\n            return Repo(url, *args, **kwargs)\n        except NotDvcRepoError:\n            pass  # fallthrough to _external_repo\n\n    return _external_repo(url, *args, **kwargs)\n\n\ndef erepo_factory(url, root_dir, cache_config):\n    from dvc.fs import localfs\n\n    def make_repo(path, fs=None, **_kwargs):\n        _config = cache_config.copy()\n        if os.path.isdir(url):\n            fs = fs or localfs\n            repo_path = os.path.join(url, *fs.relparts(path, root_dir))\n            _config.update(_get_remote_config(repo_path))\n        return Repo(path, fs=fs, config=_config, **_kwargs)\n\n    return make_repo\n\n\nCLONES: dict[str, tuple[str, bool]] = {}\nCACHE_DIRS: dict[str, str] = {}\n\n\n@wrap_with(threading.Lock())\ndef _get_cache_dir(url):\n    try:\n        cache_dir = CACHE_DIRS[url]\n    except KeyError:\n        cache_dir = CACHE_DIRS[url] = tempfile.mkdtemp(\"dvc-cache\")\n    return cache_dir\n\n\ndef clean_repos():\n    # Outside code should not see cache while we are removing\n    paths = [path for path, _ in CLONES.values()] + list(CACHE_DIRS.values())\n    CLONES.clear()\n    CACHE_DIRS.clear()\n\n    for path in paths:\n        _remove(path)\n\n\ndef _get_remote_config(url):\n    try:\n        repo = Repo(url, uninitialized=True)\n    except NotDvcRepoError:\n        return {}\n\n    try:\n        name = repo.config[\"core\"].get(\"remote\")\n        if not name:\n            # Fill the empty upstream entry with a new remote pointing to the\n            # original repo's cache location.\n            name = \"auto-generated-upstream\"\n            try:\n                local_cache_dir = repo.cache.local_cache_dir\n            except AttributeError:\n                # if the `.dvc` dir is missing, we get an AttributeError\n                return {}\n            else:\n                return {\n                    \"core\": {\"remote\": name},\n                    \"remote\": {name: {\"url\": local_cache_dir}},\n                }\n\n        # Use original remote to make sure that we are using correct url,\n        # credential paths, etc if they are relative to the config location.\n        return {\"remote\": {name: repo.config[\"remote\"][name]}}\n    finally:\n        repo.close()\n\n\ndef _cached_clone(url, rev):\n    \"\"\"Clone an external git repo to a temporary directory.\n\n    Returns the path to a local temporary directory with the specified\n    revision checked out.\n    \"\"\"\n    from shutil import copytree\n\n    # even if we have already cloned this repo, we may need to\n    # fetch/fast-forward to get specified rev\n    clone_path, shallow = _clone_default_branch(url, rev)\n\n    if url in CLONES:\n        return CLONES[url][0]\n\n    # Copy to a new dir to keep the clone clean\n    repo_path = tempfile.mkdtemp(\"dvc-erepo\")\n    logger.debug(\"erepo: making a copy of %s clone\", url)\n    copytree(clone_path, repo_path)\n\n    CLONES[url] = (repo_path, shallow)\n    return repo_path\n\n\n@wrap_with(threading.Lock())\ndef _clone_default_branch(url, rev):\n    \"\"\"Get or create a clean clone of the url.\n\n    The cloned is reactualized with git pull unless rev is a known sha.\n    \"\"\"\n    from dvc.scm import Git\n\n    clone_path, shallow = CLONES.get(url) or (None, False)\n\n    git = None\n    try:\n        if clone_path:\n            git = Git(clone_path)\n            # Do not pull for known shas, branches and tags might move\n            if not Git.is_sha(rev) or not git.has_rev(rev):\n                if shallow:\n                    # If we are missing a rev in a shallow clone, fallback to\n                    # a full (unshallowed) clone. Since fetching specific rev\n                    # SHAs is only available in certain git versions, if we\n                    # have need to reference multiple specific revs for a\n                    # given repo URL it is easier/safer for us to work with\n                    # full clones in this case.\n                    logger.debug(\"erepo: unshallowing clone for '%s'\", url)\n                    _pull(git, unshallow=True)\n                    shallow = False\n                    CLONES[url] = (clone_path, shallow)\n                else:\n                    logger.debug(\"erepo: git pull '%s'\", url)\n                    _pull(git)\n        else:\n            from dvc.scm import clone\n\n            logger.debug(\"erepo: git clone '%s' to a temporary dir\", url)\n            clone_path = tempfile.mkdtemp(\"dvc-clone\")\n            if rev and not Git.is_sha(rev):\n                # If rev is a tag or branch name try shallow clone first\n\n                try:\n                    git = clone(url, clone_path, shallow_branch=rev)\n                    shallow = os.path.exists(\n                        os.path.join(clone_path, Git.GIT_DIR, \"shallow\")\n                    )\n                    if shallow:\n                        logger.debug(\"erepo: using shallow clone for branch '%s'\", rev)\n                except CloneError:\n                    git_dir = os.path.join(clone_path, \".git\")\n                    if os.path.exists(git_dir):\n                        _remove(git_dir)\n            if not git:\n                git = clone(url, clone_path)\n                shallow = False\n            CLONES[url] = (clone_path, shallow)\n    finally:\n        if git:\n            git.close()\n\n    return clone_path, shallow\n\n\ndef _pull(git: \"Git\", unshallow: bool = False):\n    from dvc.repo.experiments.utils import fetch_all_exps\n\n    git.fetch(unshallow=unshallow)\n    _merge_upstream(git)\n    fetch_all_exps(git, \"origin\")\n\n\ndef _merge_upstream(git: \"Git\"):\n    from scmrepo.exceptions import SCMError\n\n    try:\n        branch = git.active_branch()\n        upstream = f\"refs/remotes/origin/{branch}\"\n        if git.get_ref(upstream):\n            git.merge(upstream)\n    except SCMError:\n        pass\n\n\ndef _remove(path):\n    from dvc.utils.fs import remove\n\n    if os.name == \"nt\":\n        # git.exe may hang for a while not permitting to remove temp dir\n        os_retry = retry(5, errors=OSError, timeout=0.1)\n        try:\n            os_retry(remove)(path)\n        except PermissionError:\n            logger.warning(\"Failed to remove '%s'\", relpath(path), exc_info=True)\n    else:\n        remove(path)\n"
  },
  {
    "path": "dvc/repo/params/__init__.py",
    "content": "class Params:\n    def __init__(self, repo):\n        self.repo = repo\n\n    def show(self, *args, **kwargs):\n        from .show import show\n\n        return show(self.repo, *args, **kwargs)\n\n    def diff(self, *args, **kwargs):\n        from .diff import diff\n\n        return diff(self.repo, *args, **kwargs)\n"
  },
  {
    "path": "dvc/repo/params/diff.py",
    "content": "from typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.repo.metrics.diff import DiffResult\n\n\ndef diff(\n    repo: \"Repo\",\n    a_rev: str = \"HEAD\",\n    b_rev: str = \"workspace\",\n    all: bool = False,  # noqa: A002\n    **kwargs,\n) -> \"DiffResult\":\n    if repo.scm.no_commits:\n        return {}\n\n    from dvc.repo.metrics.diff import _diff\n\n    params = repo.params.show(revs=[a_rev, b_rev], hide_workspace=False, **kwargs)\n    return _diff(params, a_rev, b_rev, with_unchanged=all)\n"
  },
  {
    "path": "dvc/repo/params/show.py",
    "content": "import os\nfrom collections import defaultdict\nfrom collections.abc import Iterator\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom dvc.dependency.param import ParamsDependency, read_param_file\nfrom dvc.log import logger\nfrom dvc.repo.metrics.show import FileResult, Result, try_expand_paths\nfrom dvc.stage import PipelineStage\nfrom dvc.utils import as_posix\nfrom dvc.utils.collections import ensure_list\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\ndef _collect_top_level_params(repo: \"Repo\") -> Iterator[str]:\n    top_params = repo.index._params\n    for dvcfile, params in top_params.items():\n        wdir = repo.fs.relpath(repo.fs.parent(dvcfile), repo.root_dir)\n        for file in params:\n            path = repo.fs.join(wdir, as_posix(file))\n            yield repo.fs.normpath(path)\n\n\ndef params_from_target(\n    repo: \"Repo\", targets: list[str]\n) -> Iterator[\"ParamsDependency\"]:\n    stages = chain.from_iterable(repo.stage.collect(target) for target in targets)\n    for stage in stages:\n        yield from stage.params\n\n\ndef _collect_params(\n    repo: \"Repo\",\n    targets: Union[list[str], dict[str, list[str]], None] = None,\n    stages: Optional[list[str]] = None,\n    deps_only: bool = False,\n    default_file: Optional[str] = None,\n) -> dict[str, list[str]]:\n    from dvc.dependency import _merge_params\n\n    if isinstance(targets, list):\n        targets = {target: [] for target in targets}\n\n    params: list[dict[str, list[str]]] = []\n\n    if targets:\n        # target is a repo-relative path\n        params.extend({file: params} for file, params in targets.items())\n\n    if not targets or stages:\n        deps = params_from_target(repo, stages) if stages else repo.index.params\n        relpath = repo.fs.relpath\n        params.extend(\n            {relpath(dep.fs_path, repo.root_dir): list(dep.params)} for dep in deps\n        )\n\n    fs = repo.dvcfs\n\n    if not targets and not deps_only and not stages:\n        # _collect_top_level_params returns repo-relative paths\n        params.extend({param: []} for param in _collect_top_level_params(repo))\n        if default_file and fs.exists(f\"{fs.root_marker}{default_file}\"):\n            params.append({default_file: []})\n\n    # combine all the param files and the keypaths to track\n    all_params = _merge_params(params)\n\n    ret = {}\n    for param, _params in all_params.items():\n        # convert to posixpath for DVCFileSystem\n        path = fs.from_os_path(param)\n        # make paths absolute for DVCFileSystem\n        repo_path = f\"{fs.root_marker}{path}\"\n        ret.update(dict.fromkeys(try_expand_paths(fs, [repo_path]), _params))\n    return ret\n\n\ndef _collect_vars(repo, params, stages=None) -> dict:\n    vars_params: dict[str, dict] = defaultdict(dict)\n\n    for stage in repo.index.stages:\n        if isinstance(stage, PipelineStage) and stage.tracked_vars:\n            if stages and stage.addressing not in stages:\n                continue\n            for file, vars_ in stage.tracked_vars.items():\n                # `params` file are shown regardless of `tracked` or not\n                # to reduce noise and duplication, they are skipped\n\n                # `file` is relative\n                abspath = repo.fs.abspath(file)\n                repo_path = repo.dvcfs.from_os_path(abspath)\n                if repo_path in params:\n                    continue\n\n                vars_params[repo_path].update(vars_)\n    return dict(vars_params)\n\n\ndef _read_params(\n    fs: \"FileSystem\", params: dict[str, list[str]], **load_kwargs\n) -> Iterator[tuple[str, Union[Exception, Any]]]:\n    for file_path, key_paths in params.items():\n        try:\n            yield file_path, read_param_file(fs, file_path, key_paths, **load_kwargs)\n        except Exception as exc:  # noqa: BLE001\n            logger.debug(exc)\n            yield file_path, exc\n\n\ndef _gather_params(\n    repo: \"Repo\",\n    targets: Union[list[str], dict[str, list[str]], None] = None,\n    deps_only: bool = False,\n    stages: Optional[list[str]] = None,\n    on_error: str = \"return\",\n):\n    assert on_error in (\"raise\", \"return\", \"ignore\")\n\n    # `files` is a repo-relative posixpath that can be passed to DVCFileSystem\n    # It is absolute, i.e. has a root_marker `/` in front which we strip when returning\n    # the result and convert to appropriate repo-relative os.path.\n    files_keypaths = _collect_params(\n        repo,\n        targets=targets,\n        stages=stages,\n        deps_only=deps_only,\n        default_file=ParamsDependency.DEFAULT_PARAMS_FILE,\n    )\n\n    data: dict[str, FileResult] = {}\n\n    fs = repo.dvcfs\n    for fs_path, result in _read_params(fs, files_keypaths, cache=True):\n        repo_path = fs_path.lstrip(fs.root_marker)\n        repo_os_path = os.sep.join(fs.parts(repo_path))\n        if not isinstance(result, Exception):\n            data.update({repo_os_path: FileResult(data=result)})\n            continue\n\n        if on_error == \"raise\":\n            raise result\n        if on_error == \"return\":\n            data.update({repo_os_path: FileResult(error=result)})\n\n    if not (stages or targets):\n        data.update(\n            {\n                path: FileResult(data=result)\n                for path, result in _collect_vars(repo, data).items()\n            }\n        )\n    return data\n\n\ndef show(\n    repo: \"Repo\",\n    targets: Optional[list[str]] = None,\n    stages: Optional[list[str]] = None,\n    deps_only: bool = False,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    revs: Optional[list[str]] = None,\n    all_commits: bool = False,\n    hide_workspace: bool = True,\n    on_error: str = \"return\",\n) -> dict[str, Result]:\n    assert on_error in (\"raise\", \"return\", \"ignore\")\n    res = {}\n\n    targets = ensure_list(targets)\n    targets = [repo.dvcfs.from_os_path(target) for target in targets]\n\n    for rev in repo.brancher(\n        revs=revs,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n    ):\n        try:\n            params = _gather_params(\n                repo=repo,\n                targets=targets,\n                stages=stages,\n                deps_only=deps_only,\n                on_error=on_error,\n            )\n            res[rev] = Result(data=params)\n        except Exception as exc:\n            if on_error == \"raise\":\n                raise\n            logger.warning(\"failed to load params in revision %r, %s\", rev, str(exc))\n            if on_error == \"return\":\n                res[rev] = Result(error=exc)\n\n    if hide_workspace:\n        from dvc.repo.metrics.show import _hide_workspace\n\n        _hide_workspace(repo.scm, res)\n    return res\n"
  },
  {
    "path": "dvc/repo/plots/__init__.py",
    "content": "import csv\nimport io\nimport os\nfrom collections import defaultdict\nfrom collections.abc import Iterator\nfrom copy import deepcopy\nfrom functools import partial\nfrom multiprocessing import cpu_count\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, Union\n\nimport dpath\nimport dpath.options\nfrom funcy import first, ldistinct, project, reraise\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.utils import error_handler, errored_revisions\nfrom dvc.utils.objects import cached_property\nfrom dvc.utils.serialize import PARSERS, EncodingError\nfrom dvc.utils.threadpool import ThreadPoolExecutor\nfrom dvc_render.image import ImageRenderer\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.types import DictStrAny, StrPath\n\ndpath.options.ALLOW_EMPTY_STRING_KEYS = True\n\nlogger = logger.getChild(__name__)\n\n\ndef onerror_collect(result: dict, exception: Exception, *args, **kwargs):\n    logger.debug(\"\", exc_info=True)  # noqa: LOG014\n    result[\"error\"] = exception\n\n\nSUPPORTED_IMAGE_EXTENSIONS = ImageRenderer.EXTENSIONS\n\n\nclass PlotMetricTypeError(DvcException):\n    def __init__(self, file):\n        super().__init__(\n            f\"'{file}' - file type error\\n\"\n            \"Only JSON, YAML, CSV and TSV formats are supported.\"\n        )\n\n\nclass NotAPlotError(DvcException):\n    def __init__(self, out):\n        super().__init__(\n            f\"'{out}' is not a known plot. Use `dvc plots modify` to turn it into one.\"\n        )\n\n\nclass PropsNotFoundError(DvcException):\n    pass\n\n\n@error_handler\ndef _unpack_dir_files(fs, path, **kwargs):\n    ret = list(fs.find(path))\n    if not ret:\n        # This will raise FileNotFoundError if it is a broken symlink or TreeError\n        next(iter(fs.ls(path)), None)\n    return ret\n\n\nclass Plots:\n    def __init__(self, repo):\n        self.repo = repo\n\n    def collect(\n        self,\n        targets: Optional[list[str]] = None,\n        revs: Optional[list[str]] = None,\n        recursive: bool = False,\n        onerror: Optional[Callable] = None,\n        props: Optional[dict] = None,\n    ) -> Iterator[dict]:\n        \"\"\"Collects plots definitions and data sources.\n\n        Generator yielding a structure like:\n            {\n                revision:\n                {\n                    \"definitions\":\n                    {\n                        \"data\":\n                        {\n                            \"config_file\":\n                            {\n                                \"data\":\n                                {\n                                    plot_id:\n                                    {\n                                        plot_config\n                                    }\n                                }\n                            }\n                        }\n                    },\n                    \"sources\":\n                    {\n                        \"data\":\n                        {\n                            \"filename\":\n                            {\n                                \"data_source\": callable loading the data,\n                                \"props\": properties for the file if it is\n                                         plots type output\n                            }\n                        }\n                    }\n                }\n\n            }\n        \"\"\"\n        from dvc.repo.experiments.brancher import switch_repo\n        from dvc.utils.collections import ensure_list\n\n        targets = ensure_list(targets)\n        targets = [self.repo.dvcfs.from_os_path(target) for target in targets]\n\n        if revs is None:\n            revs = [\"workspace\"]\n        else:\n            revs = list(revs)\n            if \"workspace\" in revs:\n                # reorder revs to match repo.brancher ordering\n                revs.remove(\"workspace\")\n                revs = [\"workspace\", *revs]\n        for rev in revs:\n            with switch_repo(self.repo, rev) as (repo, _):\n                res: dict = {}\n                definitions = _collect_definitions(\n                    repo,\n                    targets=targets,\n                    revision=rev,\n                    onerror=onerror,\n                    props=props,\n                )\n                if definitions:\n                    res[rev] = {\"definitions\": definitions}\n\n                    data_targets = _get_data_targets(definitions)\n\n                    res[rev][\"sources\"] = self._collect_data_sources(\n                        repo,\n                        targets=data_targets,\n                        recursive=recursive,\n                        props=props,\n                        onerror=onerror,\n                    )\n                yield res\n\n    @error_handler\n    def _collect_data_sources(\n        self,\n        repo: \"Repo\",\n        targets: Optional[list[str]] = None,\n        recursive: bool = False,\n        props: Optional[dict] = None,\n        onerror: Optional[Callable] = None,\n    ):\n        fs = repo.dvcfs\n\n        props = props or {}\n\n        plots = _collect_plots(repo, targets, recursive)\n        res: dict[str, Any] = {}\n        for fs_path, rev_props in plots.items():\n            joined_props = rev_props | props\n            res[fs_path] = {\"props\": joined_props}\n            res[fs_path].update(\n                {\n                    \"data_source\": partial(\n                        parse,\n                        fs,\n                        fs_path,\n                        props=joined_props,\n                        onerror=onerror,\n                    )\n                }\n            )\n        return res\n\n    def show(\n        self,\n        targets: Optional[list[str]] = None,\n        revs=None,\n        props=None,\n        recursive=False,\n        onerror=None,\n    ):\n        if onerror is None:\n            onerror = onerror_collect\n\n        result: dict[str, dict] = {}\n        for data in self.collect(\n            targets,\n            revs,\n            recursive,\n            onerror=onerror,\n            props=props,\n        ):\n            short_rev = \"workspace\"\n            if rev := getattr(self.repo.fs, \"rev\", None):\n                short_rev = rev[:7]\n            _resolve_data_sources(data, short_rev, cache=True)\n            result.update(data)\n\n        errored = errored_revisions(result)\n        if errored:\n            from dvc.ui import ui\n\n            ui.error_write(\n                \"DVC failed to load some plots for following revisions: \"\n                f\"'{', '.join(errored)}'.\"\n            )\n\n        return result\n\n    def diff(self, *args, **kwargs):\n        from .diff import diff\n\n        return diff(self.repo, *args, **kwargs)\n\n    @staticmethod\n    def _unset(out, props):\n        missing = list(set(props) - set(out.plot.keys()))\n        if missing:\n            raise PropsNotFoundError(\n                f\"display properties {missing} not found in plot '{out}'\"\n            )\n\n        for prop in props:\n            out.plot.pop(prop)\n\n    def modify(self, path, props=None, unset=None):\n        from dvc_render.vega_templates import get_template\n\n        props = props or {}\n        template = props.get(\"template\")\n        if template:\n            get_template(template, self.templates_dir)\n\n        (out,) = self.repo.find_outs_by_path(path)\n        if not out.plot and unset is not None:\n            raise NotAPlotError(out)\n\n        # This out will become a plot unless it is one already\n        if not isinstance(out.plot, dict):\n            out.plot = {}\n\n        if unset:\n            self._unset(out, unset)\n\n        out.plot.update(props)\n\n        # Empty dict will move it to non-plots\n        if not out.plot:\n            out.plot = True\n\n        out.verify_metric()\n        out.stage.dump(update_lock=False)\n\n    @cached_property\n    def templates_dir(self) -> Optional[str]:\n        if self.repo.dvc_dir:\n            return os.path.join(self.repo.dvc_dir, \"plots\")\n        return None\n\n\ndef _is_plot(out: \"Output\") -> bool:\n    return bool(out.plot)\n\n\ndef _resolve_data_sources(plots_data: dict, rev: str, cache: bool = False):\n    from dvc.progress import Tqdm\n\n    values = list(plots_data.values())\n    to_resolve = []\n    while values:\n        value = values.pop()\n        if isinstance(value, dict):\n            if \"data_source\" in value:\n                to_resolve.append(value)\n            values.extend(value.values())\n\n    def resolve(value):\n        data_source = value.pop(\"data_source\")\n        assert callable(data_source)\n        value.update(data_source(cache=cache))\n\n    if not to_resolve:\n        return\n\n    executor = ThreadPoolExecutor(\n        max_workers=min(16, 4 * cpu_count()),\n        thread_name_prefix=\"resolve_data\",\n        cancel_on_error=True,\n    )\n    with executor:\n        iterable = executor.imap_unordered(resolve, to_resolve)\n        with Tqdm(\n            iterable,\n            total=len(to_resolve),\n            desc=f\"Reading plot's data from {rev}\",\n            unit=\"files\",\n            unit_scale=False,\n        ) as progress_iterable:\n            list(progress_iterable)\n\n\ndef _collect_plots(\n    repo: \"Repo\",\n    targets: Optional[list[str]] = None,\n    recursive: bool = False,\n) -> dict[str, dict]:\n    from dvc.repo.collect import collect\n\n    plots, fs_paths = collect(\n        repo,\n        output_filter=_is_plot,\n        targets=targets,\n        recursive=recursive,\n    )\n\n    result = {\n        repo.dvcfs.from_os_path(plot.fs_path): _plot_props(plot) for plot in plots\n    }\n    result.update({fs_path: {} for fs_path in fs_paths})\n    return result\n\n\ndef _get_data_targets(definitions: dict):\n    result: set = set()\n    if \"data\" in definitions:\n        for content in definitions[\"data\"].values():\n            if \"data\" in content:\n                for plot_id, config in content[\"data\"].items():\n                    result = result.union(infer_data_sources(plot_id, config))\n    return result\n\n\ndef infer_data_sources(plot_id, config=None):\n    y = config.get(\"y\", None) if config else None\n\n    if isinstance(y, dict):\n        sources = list(y.keys())\n    else:\n        sources = [plot_id]\n\n    x = config.get(\"x\", None) if config else None\n    if isinstance(x, dict):\n        sources.append(first(x.keys()))\n\n    return ldistinct(source for source in sources)\n\n\ndef _matches(targets, config_file, plot_id):\n    import re\n\n    from dvc.utils.plots import get_plot_id\n\n    if not targets:\n        return True\n\n    full_id = get_plot_id(plot_id, config_file)\n    return any(\n        (re.match(target, plot_id) or re.match(target, full_id)) for target in targets\n    )\n\n\ndef _normpath(path):\n    # TODO dvcfs.normopath normalizes to windows path on Windows\n    # even though other methods work as expected\n    import posixpath\n\n    return posixpath.normpath(path)\n\n\ndef _relpath(fs, path):\n    # TODO from_os_path changes abs to relative\n    # TODO we should be using `dvcfile.relpath` - in case of GitFS (plots diff)\n    # and invoking from some subdir `dvcfile.relpath` returns strange long\n    # relative paths\n    # (\"../../../../../../dvc.yaml\") - investigate\n    return fs.relpath(fs.join(\"/\", fs.from_os_path(path)), fs.getcwd())\n\n\ndef _collect_output_plots(repo, targets, props, onerror: Optional[Callable] = None):\n    fs = repo.dvcfs\n    result: dict[str, dict] = {}\n    for plot in repo.index.plots:\n        plot_props = _plot_props(plot)\n        dvcfile = plot.stage.dvcfile\n        config_path = _relpath(fs, dvcfile.path)\n        wdir_relpath = _relpath(fs, plot.stage.wdir)\n        if _matches(targets, config_path, str(plot)):\n            unpacked = unpack_if_dir(\n                fs,\n                _normpath(fs.join(wdir_relpath, plot.def_path)),\n                props=plot_props | props,\n                onerror=onerror,\n            )\n\n            dpath.merge(result, {\"\": unpacked})\n    return result\n\n\ndef _id_is_path(plot_props=None):\n    if not plot_props:\n        return True\n\n    y_def = plot_props.get(\"y\")\n    return not isinstance(y_def, dict)\n\n\ndef _adjust_sources(fs, plot_props, config_dir):\n    new_plot_props = deepcopy(plot_props)\n    for axis in [\"x\", \"y\"]:\n        x_is_inferred = axis == \"x\" and (\n            axis not in new_plot_props or isinstance(new_plot_props[axis], str)\n        )\n        if x_is_inferred:\n            continue\n        old = new_plot_props.pop(axis, {})\n        new = {}\n        for filepath, val in old.items():\n            new[_normpath(fs.join(config_dir, filepath))] = val\n        new_plot_props[axis] = new\n    return new_plot_props\n\n\ndef _resolve_definitions(\n    fs: \"FileSystem\",\n    targets: list[str],\n    props: dict[str, Any],\n    config_path: \"StrPath\",\n    definitions: \"DictStrAny\",\n    onerror: Optional[Callable[[Any], Any]] = None,\n):\n    config_path = os.fspath(config_path)\n    config_dir = fs.dirname(config_path)\n    result: dict[str, dict] = {}\n\n    plot_ids_parents = [\n        _normpath(fs.join(config_dir, plot_id)) for plot_id in definitions\n    ]\n    for plot_id, plot_props in definitions.items():\n        if plot_props is None:\n            plot_props = {}\n        if _id_is_path(plot_props):\n            data_path = _normpath(fs.join(config_dir, plot_id))\n            if _matches(targets, config_path, plot_id):\n                unpacked = unpack_if_dir(\n                    fs, data_path, props=plot_props | props, onerror=onerror\n                )\n                # use config for parent directory with most specific definition\n                if unpacked.get(\"data\"):\n                    unpacked[\"data\"] = {\n                        k: v\n                        for k, v in unpacked[\"data\"].items()\n                        if _closest_parent(fs, k, plot_ids_parents) == data_path\n                    }\n                dpath.merge(result, unpacked)\n        elif _matches(targets, config_path, plot_id):\n            adjusted_props = _adjust_sources(fs, plot_props, config_dir)\n            dpath.merge(result, {\"data\": {plot_id: adjusted_props | props}})\n\n    return result\n\n\ndef _closest_parent(fs, path, parents):\n    best_result = \"\"\n    for parent in parents:\n        common_path = fs.commonpath([path, parent])\n        if len(common_path) > len(best_result):\n            best_result = common_path\n    return best_result\n\n\ndef _collect_pipeline_files(repo, targets: list[str], props, onerror=None):\n    result: dict[str, dict] = {}\n    top_plots = repo.index._plots\n    for dvcfile, plots_def in top_plots.items():\n        dvcfile_path = _relpath(repo.dvcfs, dvcfile)\n        dvcfile_defs_dict: dict[str, Union[dict, None]] = {}\n        for elem in plots_def:\n            if isinstance(elem, str):\n                dvcfile_defs_dict[elem] = None\n            else:\n                assert elem\n                k, v = next(iter(elem.items()))\n                dvcfile_defs_dict[k] = v\n\n        resolved = _resolve_definitions(\n            repo.dvcfs, targets, props, dvcfile_path, dvcfile_defs_dict, onerror=onerror\n        )\n        dpath.merge(result, {dvcfile_path: resolved})\n    return result\n\n\n@error_handler\ndef _collect_definitions(\n    repo: \"Repo\",\n    targets: list[str],\n    props: Optional[dict] = None,\n    onerror: Optional[Callable] = None,\n    **kwargs,\n) -> dict:\n    result: dict = defaultdict(dict)\n    props = props or {}\n\n    fs = repo.dvcfs\n    dpath.merge(result, _collect_pipeline_files(repo, targets, props, onerror=onerror))\n\n    dpath.merge(result, _collect_output_plots(repo, targets, props, onerror=onerror))\n\n    for target in targets:\n        if not result or fs.exists(target):\n            unpacked = unpack_if_dir(fs, target, props=props, onerror=onerror)\n            dpath.merge(result[\"\"], unpacked)\n\n    return dict(result)\n\n\ndef unpack_if_dir(fs, path, props: dict[str, str], onerror: Optional[Callable] = None):\n    result: dict[str, dict] = defaultdict(dict)\n    if fs.isdir(path):\n        unpacked = _unpack_dir_files(fs, path, onerror=onerror)\n    else:\n        unpacked = {\"data\": [path]}\n\n    if \"data\" in unpacked:\n        for subpath in unpacked[\"data\"]:\n            result[\"data\"].update({subpath: props.copy()})\n    else:\n        result.update(unpacked)\n\n    return dict(result)\n\n\n@error_handler\ndef parse(fs, path, props=None, **fs_kwargs):\n    props = props or {}\n    _, extension = os.path.splitext(path)\n    if extension in SUPPORTED_IMAGE_EXTENSIONS:\n        with fs.open(path, mode=\"rb\", **fs_kwargs) as fd:\n            return fd.read()\n\n    if extension not in PARSERS.keys() | {\".yml\", \".yaml\", \".csv\", \".tsv\"}:\n        raise PlotMetricTypeError(path)\n\n    with reraise(UnicodeDecodeError, EncodingError(path, \"utf8\")):\n        with fs.open(path, mode=\"r\", encoding=\"utf8\", **fs_kwargs) as fd:\n            contents = fd.read()\n\n    if extension in (\".csv\", \".tsv\"):\n        header = props.get(\"header\", True)\n        delim = \"\\t\" if extension == \".tsv\" else \",\"\n        return _load_sv(contents, delimiter=delim, header=header)\n    return PARSERS[extension](contents, path)\n\n\ndef _plot_props(out: \"Output\") -> dict:\n    from dvc.schema import PLOT_PROPS\n\n    if not (out.plot):\n        raise NotAPlotError(out)\n    if isinstance(out.plot, list):\n        raise DvcException(\"Multiple plots per data file not supported.\")\n    if isinstance(out.plot, bool):\n        return {}\n\n    return project(out.plot, PLOT_PROPS)\n\n\ndef _load_sv(content, delimiter=\",\", header=True):\n    if header:\n        reader = csv.DictReader(io.StringIO(content), delimiter=delimiter)\n    else:\n        first_row = first(csv.reader(io.StringIO(content)))\n        reader = csv.DictReader(\n            io.StringIO(content),\n            delimiter=delimiter,\n            fieldnames=[str(i) for i in range(len(first_row))],\n        )\n    return list(reader)\n"
  },
  {
    "path": "dvc/repo/plots/diff.py",
    "content": "def _revisions(repo, revs, experiment):\n    revisions = revs or []\n    if experiment and len(revisions) == 1:\n        baseline = repo.experiments.get_baseline(revisions[0])\n        if baseline:\n            revisions.append(baseline[:7])\n    if len(revisions) <= 1:\n        if len(revisions) == 0 and repo.scm.is_dirty(untracked_files=False):\n            revisions.append(\"HEAD\")\n        revisions.append(\"workspace\")\n    return revisions\n\n\ndef diff(repo, *args, revs=None, experiment=False, **kwargs):\n    if repo.scm.no_commits:\n        return {}\n    return repo.plots.show(*args, revs=_revisions(repo, revs, experiment), **kwargs)\n"
  },
  {
    "path": "dvc/repo/pull.py",
    "content": "from dvc.exceptions import CheckoutError\nfrom dvc.log import logger\nfrom dvc.repo import locked\nfrom dvc.utils import glob_targets\n\nlogger = logger.getChild(__name__)\n\n\n@locked\ndef pull(  # noqa: PLR0913\n    self,\n    targets=None,\n    jobs=None,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    force=False,\n    recursive=False,\n    all_commits=False,\n    run_cache=False,\n    glob=False,\n    allow_missing=False,\n):\n    if isinstance(targets, str):\n        targets = [targets]\n\n    expanded_targets = glob_targets(targets, glob=glob)\n\n    processed_files_count = self.fetch(\n        expanded_targets,\n        jobs,\n        remote=remote,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        with_deps=with_deps,\n        recursive=recursive,\n        run_cache=run_cache,\n    )\n    try:\n        result = self.checkout(\n            targets=expanded_targets,\n            with_deps=with_deps,\n            force=force,\n            recursive=recursive,\n            allow_missing=allow_missing,\n        )\n    except CheckoutError as exc:\n        # put fetched counts first\n        exc.result[\"stats\"] = {\"fetched\": processed_files_count} | exc.result[\"stats\"]\n        raise\n    else:\n        # put fetched counts first\n        result[\"stats\"] = {\"fetched\": processed_files_count} | result[\"stats\"]\n    return result\n"
  },
  {
    "path": "dvc/repo/push.py",
    "content": "from contextlib import suppress\n\nfrom dvc.exceptions import InvalidArgumentError, UploadError\nfrom dvc.log import logger\nfrom dvc.stage.cache import RunCacheNotSupported\nfrom dvc.ui import ui\n\nfrom . import locked\n\nlogger = logger.getChild(__name__)\n\n\ndef _rebuild(idx, path, fs, cb):\n    from dvc_data.index import DataIndex, DataIndexEntry, Meta\n\n    new = DataIndex()\n    items = list(idx.items())\n\n    cb.set_size(len(items))\n    for key, entry in items:\n        if entry.meta and entry.meta.isdir:\n            meta = Meta(isdir=True)\n        else:\n            try:\n                meta = Meta.from_info(fs.info(fs.join(path, *key)), fs.protocol)\n            except FileNotFoundError:\n                meta = None\n\n        if meta:\n            new.add(DataIndexEntry(key=key, meta=meta))\n\n        cb.relative_update(1)\n\n    return new\n\n\ndef _update_meta(index, **kwargs):\n    from dvc.repo.worktree import _merge_push_meta, worktree_view_by_remotes\n\n    stages = set()\n    for remote_name, idx in worktree_view_by_remotes(index, push=True, **kwargs):\n        remote = index.repo.cloud.get_remote(remote_name)\n\n        if not remote.fs.version_aware:\n            continue\n\n        with ui.progress(\n            desc=f\"Collecting {remote.path} on {remote.fs.protocol}\",\n            unit=\"entry\",\n            leave=True,\n        ) as pb:\n            cb = pb.as_callback()\n            new = _rebuild(idx.data[\"repo\"], remote.path, remote.fs, cb)\n\n        for out in idx.outs:\n            _merge_push_meta(out, new, remote.name)\n            stages.add(out.stage)\n\n    for stage in stages:\n        stage.dump(with_files=True, update_pipeline=False)\n\n\n@locked\ndef push(  # noqa: PLR0913\n    self,\n    targets=None,\n    jobs=None,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    recursive=False,\n    all_commits=False,\n    run_cache=False,\n    revs=None,\n    workspace=True,\n    glob=False,\n):\n    from fsspec.utils import tokenize\n\n    from dvc.config import NoRemoteError\n    from dvc.utils import glob_targets\n    from dvc_data.index.fetch import collect\n    from dvc_data.index.push import push as ipush\n\n    from .fetch import _collect_indexes\n\n    failed_count = 0\n    transferred_count = 0\n\n    with suppress(NoRemoteError):\n        _remote = self.cloud.get_remote(name=remote)\n        if (\n            _remote\n            and (_remote.worktree or _remote.fs.version_aware)\n            and (revs or all_branches or all_tags or all_commits)\n        ):\n            raise InvalidArgumentError(\n                \"Multiple rev push is unsupported for cloud versioned remotes\"\n            )\n\n    try:\n        used_run_cache = self.stage_cache.push(remote) if run_cache else []\n        transferred_count += len(used_run_cache)\n    except RunCacheNotSupported as e:\n        logger.debug(\"failed to push run cache: %s\", e)\n\n    if isinstance(targets, str):\n        targets = [targets]\n\n    indexes = _collect_indexes(\n        self,\n        targets=glob_targets(targets, glob=glob),\n        remote=remote,\n        all_branches=all_branches,\n        with_deps=with_deps,\n        all_tags=all_tags,\n        recursive=recursive,\n        all_commits=all_commits,\n        revs=revs,\n        workspace=workspace,\n        push=True,\n    )\n\n    cache_key = (\n        \"push\",\n        tokenize(sorted(idx.data_tree.hash_info.value for idx in indexes.values())),\n    )\n\n    with ui.progress(desc=\"Collecting\", unit=\"entry\", leave=True) as pb:\n        data = collect(\n            [idx.data[\"repo\"] for idx in indexes.values()],\n            \"remote\",\n            cache_index=self.data_index,\n            cache_key=cache_key,\n            callback=pb.as_callback(),\n            push=True,\n        )\n\n    push_transferred, push_failed = 0, 0\n    try:\n        with ui.progress(\n            desc=\"Pushing\",\n            bar_format=\"{desc}\",\n            leave=True,\n        ) as pb:\n            push_transferred, push_failed = ipush(\n                data,\n                jobs=jobs,\n                callback=pb.as_callback(),\n            )\n    finally:\n        ws_idx = indexes.get(\"workspace\")\n        if ws_idx is not None:\n            from dvc.repo.index import IndexView\n\n            _index = ws_idx.index if isinstance(ws_idx, IndexView) else ws_idx\n            _update_meta(\n                _index,\n                targets=glob_targets(targets, glob=glob),\n                with_deps=with_deps,\n                recursive=recursive,\n            )\n\n        for fs_index in data:\n            fs_index.close()\n\n        if push_transferred:\n            # NOTE: dropping cached index to force reloading from newly saved\n            # metadata from version-aware remotes\n            self.drop_data_index()\n\n    transferred_count += push_transferred\n    failed_count += push_failed\n    if failed_count:\n        raise UploadError(failed_count)\n\n    return transferred_count\n"
  },
  {
    "path": "dvc/repo/remove.py",
    "content": "import typing\n\nfrom dvc.dvcfile import DVC_FILE_SUFFIX\nfrom dvc.log import logger\nfrom dvc.stage.exceptions import (\n    StageFileDoesNotExistError,\n    StageFileIsNotDvcFileError,\n    StageNotFound,\n)\n\nfrom . import locked\n\nif typing.TYPE_CHECKING:\n    from dvc.repo import Repo\n\nlogger = logger.getChild(__name__)\n\n\n@locked\ndef remove(self: \"Repo\", target: str, outs: bool = False):\n    try:\n        stages = self.stage.from_target(target, accept_group=False)\n    except (StageNotFound, StageFileDoesNotExistError) as e:\n        # If the user specified a tracked file as a target instead of a stage,\n        # e.g. `data.csv` instead of `data.csv.dvc`,\n        # give a more helpful error message.\n        if self.fs.exists(target + DVC_FILE_SUFFIX):\n            raise StageFileIsNotDvcFileError(target) from e\n        raise\n\n    for stage in stages:\n        stage.remove(remove_outs=outs, force=outs)\n\n    return stages\n"
  },
  {
    "path": "dvc/repo/reproduce.py",
    "content": "from collections.abc import Iterable\nfrom typing import TYPE_CHECKING, Callable, NoReturn, Optional, TypeVar, Union, cast\n\nfrom funcy import ldistinct\n\nfrom dvc.exceptions import ReproductionError\nfrom dvc.log import logger\nfrom dvc.repo.scm_context import scm_context\nfrom dvc.stage.cache import RunCacheNotSupported\nfrom dvc.utils import humanize\nfrom dvc.utils.collections import ensure_list\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from networkx import DiGraph\n\n    from dvc.stage import Stage\n\n    from . import Repo\n\nlogger = logger.getChild(__name__)\nT = TypeVar(\"T\")\n\n\ndef collect_stages(\n    repo: \"Repo\",\n    targets: Iterable[str],\n    recursive: bool = False,\n    glob: bool = False,\n) -> list[\"Stage\"]:\n    stages: list[Stage] = []\n    for target in targets:\n        stages.extend(repo.stage.collect(target, recursive=recursive, glob=glob))\n    return ldistinct(stages)\n\n\ndef get_subgraph(\n    graph: \"DiGraph\",\n    nodes: Optional[list] = None,\n    pipeline: bool = False,\n    downstream: bool = False,\n) -> \"DiGraph\":\n    import networkx as nx\n\n    from .graph import get_pipeline, get_pipelines, get_subgraph_of_nodes\n\n    if not pipeline or not nodes:\n        return get_subgraph_of_nodes(graph, nodes, downstream=downstream)\n\n    pipelines = get_pipelines(graph)\n    used_pipelines = [get_pipeline(pipelines, node) for node in nodes]\n    return nx.compose_all(used_pipelines)\n\n\ndef get_active_graph(graph: \"DiGraph\") -> \"DiGraph\":\n    g = cast(\"DiGraph\", graph.copy())\n    for stage in graph:\n        if stage.frozen:\n            # NOTE: disconnect frozen stage from its dependencies\n            g.remove_edges_from(graph.out_edges(stage))\n    return g\n\n\ndef plan_repro(\n    graph: \"DiGraph\",\n    stages: Optional[list[\"T\"]] = None,\n    pipeline: bool = False,\n    downstream: bool = False,\n) -> list[\"T\"]:\n    r\"\"\"Derive the evaluation of the given node for the given graph.\n\n    When you _reproduce a stage_, you want to _evaluate the descendants_\n    to know if it make sense to _recompute_ it. A post-ordered search\n    will give us an order list of the nodes we want.\n\n    For example, let's say that we have the following pipeline:\n\n                               E\n                              / \\\n                             D   F\n                            / \\   \\\n                           B   C   G\n                            \\ /\n                             A\n\n    The derived evaluation of D would be: [A, B, C, D]\n\n    In case that `downstream` option is specified, the desired effect\n    is to derive the evaluation starting from the given stage up to the\n    ancestors. However, the `networkx.ancestors` returns a set, without\n    any guarantee of any order, so we are going to reverse the graph and\n    use a reverse post-ordered search using the given stage as a starting\n    point.\n\n                   E                                   A\n                  / \\                                 / \\\n                 D   F                               B   C   G\n                / \\   \\        --- reverse -->        \\ /   /\n               B   C   G                               D   F\n                \\ /                                     \\ /\n                 A                                       E\n\n    The derived evaluation of _downstream_ B would be: [B, D, E]\n    \"\"\"\n    import networkx as nx\n\n    sub = get_subgraph(graph, stages, pipeline=pipeline, downstream=downstream)\n    return list(nx.dfs_postorder_nodes(sub))\n\n\ndef _reproduce_stage(stage: \"Stage\", **kwargs) -> Optional[\"Stage\"]:\n    if stage.frozen and not stage.is_import:\n        msg = \"%s is frozen. Its dependencies are not going to be reproduced.\"\n        logger.warning(msg, stage)\n\n    ret = stage.reproduce(**kwargs)\n    if ret and not kwargs.get(\"dry\", False):\n        stage.dump(update_pipeline=False)\n    return ret\n\n\ndef _get_upstream_downstream_nodes(\n    graph: Optional[\"DiGraph\"], node: T\n) -> tuple[list[T], list[T]]:\n    succ = list(graph.successors(node)) if graph else []\n    pre = list(graph.predecessors(node)) if graph else []\n    return succ, pre\n\n\ndef _repr(stages: Iterable[\"Stage\"]) -> str:\n    return humanize.join(repr(stage.addressing) for stage in stages)\n\n\ndef handle_error(\n    graph: Optional[\"DiGraph\"], on_error: str, exc: Exception, stage: \"Stage\"\n) -> set[\"Stage\"]:\n    import networkx as nx\n\n    logger.warning(\"%s%s\", exc, \" (ignored)\" if on_error == \"ignore\" else \"\")\n    if not graph or on_error == \"ignore\":\n        return set()\n\n    dependents = set(nx.dfs_postorder_nodes(graph.reverse(), stage)) - {stage}\n    if dependents:\n        names = _repr(dependents)\n        msg = \"%s %s will be skipped due to this failure\"\n        logger.warning(msg, \"Stages\" if len(dependents) > 1 else \"Stage\", names)\n    return dependents\n\n\ndef _raise_error(exc: Optional[Exception], *stages: \"Stage\") -> NoReturn:\n    names = _repr(stages)\n    segment = \" stages:\" if len(stages) > 1 else \"\"\n    raise ReproductionError(f\"failed to reproduce{segment} {names}\") from exc\n\n\ndef _reproduce(\n    stages: list[\"Stage\"],\n    graph: Optional[\"DiGraph\"] = None,\n    force_downstream: bool = False,\n    on_error: str = \"fail\",\n    force: bool = False,\n    repro_fn: Callable = _reproduce_stage,\n    **kwargs,\n) -> list[\"Stage\"]:\n    assert on_error in (\"fail\", \"keep-going\", \"ignore\")\n\n    result: list[Stage] = []\n    failed: list[Stage] = []\n    to_skip: dict[Stage, Stage] = {}\n    ret: Optional[Stage] = None\n\n    force_state = dict.fromkeys(stages, force)\n\n    for stage in stages:\n        if stage in to_skip:\n            continue\n\n        if ret:\n            logger.info(\"\")  # add a newline\n\n        upstream, downstream = _get_upstream_downstream_nodes(graph, stage)\n        force_stage = force_state[stage]\n\n        try:\n            ret = repro_fn(stage, upstream=upstream, force=force_stage, **kwargs)\n        except Exception as exc:  # noqa: BLE001\n            failed.append(stage)\n            if on_error == \"fail\":\n                _raise_error(exc, stage)\n\n            dependents = handle_error(graph, on_error, exc, stage)\n            to_skip.update(dict.fromkeys(dependents, stage))\n            continue\n\n        if force_downstream and (ret or force_stage):\n            force_state.update(dict.fromkeys(downstream, True))\n\n        if ret:\n            result.append(ret)\n\n    if on_error != \"ignore\" and failed:\n        _raise_error(None, *failed)\n    return result\n\n\n@locked\n@scm_context\ndef reproduce(\n    self: \"Repo\",\n    targets: Union[Iterable[str], str, None] = None,\n    recursive: bool = False,\n    pipeline: bool = False,\n    all_pipelines: bool = False,\n    downstream: bool = False,\n    single_item: bool = False,\n    glob: bool = False,\n    on_error: Optional[str] = \"fail\",\n    **kwargs,\n):\n    from dvc.dvcfile import PROJECT_FILE\n\n    if all_pipelines or pipeline:\n        single_item = False\n        downstream = False\n\n    if not kwargs.get(\"interactive\", False):\n        kwargs[\"interactive\"] = self.config[\"core\"].get(\"interactive\", False)\n\n    stages: list[Stage] = []\n    if not all_pipelines:\n        targets_list = ensure_list(targets or PROJECT_FILE)\n        stages = collect_stages(self, targets_list, recursive=recursive, glob=glob)\n\n    if kwargs.get(\"pull\", False) and kwargs.get(\"run_cache\", True):\n        logger.debug(\"Pulling run cache\")\n        try:\n            self.stage_cache.pull(None)\n        except RunCacheNotSupported as e:\n            logger.warning(\"Failed to pull run cache: %s\", e)\n\n    graph = None\n    steps = stages\n    if not single_item:\n        graph = get_active_graph(self.index.graph)\n        steps = plan_repro(graph, stages, pipeline=pipeline, downstream=downstream)\n    return _reproduce(steps, graph=graph, on_error=on_error or \"fail\", **kwargs)\n"
  },
  {
    "path": "dvc/repo/run.py",
    "content": "from typing import TYPE_CHECKING, Union\n\nfrom dvc.utils.cli_parse import parse_params\n\nfrom . import locked\nfrom .scm_context import scm_context\n\nif TYPE_CHECKING:\n    from dvc.stage import PipelineStage, Stage\n\n    from . import Repo\n\n\n@locked\n@scm_context\ndef run(\n    self: \"Repo\",\n    no_exec: bool = False,\n    no_commit: bool = False,\n    run_cache: bool = True,\n    force: bool = True,\n    **kwargs,\n) -> Union[\"Stage\", \"PipelineStage\"]:\n    assert not kwargs.get(\"single_stage\")\n    assert not kwargs.get(\"fname\")\n    kwargs.update({\"force\": force, \"params\": parse_params(kwargs.get(\"params\", []))})\n    stage = self.stage.create(**kwargs)\n\n    if no_exec:\n        stage.ignore_outs()\n    else:\n        stage.run(no_commit=no_commit, run_cache=run_cache)\n\n    stage.dump(update_lock=not no_exec)\n    return stage\n"
  },
  {
    "path": "dvc/repo/scm_context.py",
    "content": "import logging\nimport shlex\nfrom collections.abc import Iterable, Iterator\nfrom contextlib import contextmanager\nfrom functools import wraps\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom dvc.log import logger\nfrom dvc.utils import relpath\nfrom dvc.utils.collections import ensure_list\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n    from dvc.scm import Base\n\n\nlogger = logger.getChild(__name__)\n\n\nclass SCMContext:\n    def __init__(self, scm: \"Base\", config: Optional[dict[str, Any]] = None) -> None:\n        from funcy import get_in\n\n        self.scm: Base = scm\n        self.autostage: bool = get_in(\n            config or {}, [\"core\", \"autostage\"], default=False\n        )\n        self.ignored_paths: list[str] = []\n        self.files_to_track: set[str] = set()\n        self.quiet: bool = False\n\n    def track_file(self, paths: Union[str, Iterable[str], None] = None) -> None:\n        \"\"\"Track file to remind user to track new files or autostage later.\"\"\"\n        return self.files_to_track.update(ensure_list(paths))\n\n    @staticmethod\n    def _make_git_add_cmd(paths: Union[str, Iterable[str]]) -> str:\n        files = \" \".join(map(shlex.quote, ensure_list(paths)))\n        return f\"\\tgit add {files}\"\n\n    def add(self, paths: Union[str, Iterable[str]]) -> None:\n        from scmrepo.exceptions import UnsupportedIndexFormat\n\n        from dvc.scm import add_no_submodules\n\n        try:\n            add_no_submodules(self.scm, paths)\n        except UnsupportedIndexFormat:\n            link = \"https://github.com/treeverse/dvc/issues/610\"\n            add_cmd = self._make_git_add_cmd([relpath(path) for path in paths])\n            logger.info(\"\")\n            msg = (\n                f\"failed to add, add manually using:\\n\\n{add_cmd}\\n\"\n                f\"\\nSee {link} for more details.\\n\"\n            )\n            logger.warning(msg)\n\n    def track_changed_files(self) -> None:\n        \"\"\"Stage files that have changed.\"\"\"\n        if not self.files_to_track:\n            return None\n        logger.debug(\"Staging files: %s\", self.files_to_track)\n        return self.add(self.files_to_track)\n\n    def ignore(self, path: str) -> None:\n        from scmrepo.exceptions import FileNotInRepoError\n\n        from dvc.scm import SCMError\n\n        try:\n            gitignore_file = self.scm.ignore(path)\n        except FileNotInRepoError as exc:\n            raise SCMError(str(exc))  # noqa: B904\n\n        if gitignore_file:\n            logger.debug(\"Added '%s' to gitignore file.\", path)\n            self.track_file(relpath(gitignore_file))\n            return self.ignored_paths.append(path)\n        return None\n\n    def ignore_remove(self, path: str) -> None:\n        from scmrepo.exceptions import FileNotInRepoError\n\n        from dvc.scm import SCMError\n\n        logger.debug(\"Removing '%s' from gitignore file.\", path)\n        try:\n            gitignore_file = self.scm.ignore_remove(path)\n        except FileNotInRepoError as exc:\n            raise SCMError(str(exc))  # noqa: B904\n\n        if gitignore_file:\n            return self.track_file(relpath(gitignore_file))\n        return None\n\n    @contextmanager\n    def __call__(\n        self, autostage: Optional[bool] = None, quiet: Optional[bool] = None\n    ) -> Iterator[\"SCMContext\"]:\n        try:\n            yield self\n        except Exception:\n            for path in self.ignored_paths:\n                self.ignore_remove(path)\n            raise\n        finally:\n            self.ignored_paths = []\n\n        if not self.files_to_track:\n            return\n\n        if autostage is None:\n            autostage = self.autostage\n        if quiet is None:\n            quiet = self.quiet\n\n        from dvc.scm import NoSCM\n\n        if autostage:\n            self.track_changed_files()\n        elif (\n            not quiet\n            and not isinstance(self.scm, NoSCM)\n            and logger.isEnabledFor(logging.INFO)\n        ):\n            add_cmd = self._make_git_add_cmd(self.files_to_track)\n            logger.info(\"\\nTo track the changes with git, run:\\n\\n%s\", add_cmd)\n            logger.info(\n                \"\\nTo enable auto staging, run:\\n\\n\\tdvc config core.autostage true\"\n            )\n\n        self.files_to_track = set()\n\n    def __enter__(self) -> \"SCMContext\":\n        self._cm = self()\n        return self._cm.__enter__()\n\n    def __exit__(self, *exc_args) -> None:\n        assert self._cm\n        self._cm.__exit__(*exc_args)\n\n\ndef scm_context(method, autostage: Optional[bool] = None, quiet: Optional[bool] = None):\n    @wraps(method)\n    def run(repo: \"Repo\", *args, **kw):\n        with repo.scm_context(autostage=autostage, quiet=quiet):\n            return method(repo, *args, **kw)\n\n    return run\n"
  },
  {
    "path": "dvc/repo/stage.py",
    "content": "import fnmatch\nimport typing\nfrom collections.abc import Iterable\nfrom contextlib import suppress\nfrom functools import wraps\nfrom typing import NamedTuple, Optional, Union\n\nfrom dvc.exceptions import (\n    NoOutputOrStageError,\n    OutputDuplicationError,\n    OutputNotFoundError,\n)\nfrom dvc.log import logger\nfrom dvc.repo import lock_repo\nfrom dvc.ui import ui\nfrom dvc.utils import as_posix, parse_target\n\nlogger = logger.getChild(__name__)\n\nif typing.TYPE_CHECKING:\n    from networkx import DiGraph\n\n    from dvc.repo import Repo\n    from dvc.stage import PipelineStage, Stage\n    from dvc.stage.loader import StageLoader\n\nPROJECT_FILE = \"dvc.yaml\"\n\n\nclass StageInfo(NamedTuple):\n    stage: \"Stage\"\n    filter_info: Optional[str] = None\n\n\nStageList = list[\"Stage\"]\nStageIter = Iterable[\"Stage\"]\nStageSet = set[\"Stage\"]\n\n\ndef _collect_with_deps(stages: StageList, graph: \"DiGraph\") -> StageSet:\n    from dvc.exceptions import StageNotFoundError\n    from dvc.repo.graph import collect_pipeline\n\n    res: StageSet = set()\n    for stage in stages:\n        pl = list(collect_pipeline(stage, graph=graph))\n        if not pl:\n            raise StageNotFoundError(\n                f\"Stage {stage} is not found in the project. \"\n                \"Check that there are no symlinks in the parents \"\n                \"leading up to it within the project.\"\n            )\n        res.update(pl)\n    return res\n\n\ndef _maybe_collect_from_dvc_yaml(\n    loader: \"StageLoad\", target, with_deps: bool, **load_kwargs\n) -> StageIter:\n    from dvc.stage.exceptions import StageNotFound\n\n    stages: StageList = []\n    if loader.fs.exists(PROJECT_FILE):\n        with suppress(StageNotFound):\n            stages = loader.load_all(PROJECT_FILE, target, **load_kwargs)\n    if with_deps:\n        return _collect_with_deps(stages, loader.repo.index.graph)\n    return stages\n\n\ndef _collect_specific_target(\n    loader: \"StageLoad\",\n    target: str,\n    with_deps: bool,\n    recursive: bool,\n) -> tuple[StageIter, Optional[str], Optional[str]]:\n    from dvc.dvcfile import is_valid_filename\n\n    # Optimization: do not collect the graph for a specific target\n    file, name = parse_target(target)\n\n    # if the target has a file, we can load directly from it.\n    if not file:\n        # but, if there's no file, parsing is ambiguous as it can be a\n        # stage name in `dvc.yaml` file or an output. We prioritize\n        # `dvc.yaml` stage name here. If it exists, then we move on.\n        # else, we assume it's a output name in the `collect_granular()` below\n        msg = \"Checking if stage '%s' is in '%s'\"\n        logger.debug(msg, target, PROJECT_FILE)\n        if not (recursive and loader.fs.isdir(target)):\n            stages = _maybe_collect_from_dvc_yaml(loader, target, with_deps)\n            if stages:\n                return stages, file, name\n    elif not with_deps and is_valid_filename(file):\n        stages = loader.load_all(file, name)\n        return stages, file, name\n    return [], file, name\n\n\ndef locked(f):\n    @wraps(f)\n    def wrapper(loader: \"StageLoad\", *args, **kwargs):\n        with lock_repo(loader.repo):\n            return f(loader, *args, **kwargs)\n\n    return wrapper\n\n\nclass StageLoad:\n    def __init__(self, repo: \"Repo\") -> None:\n        self.repo: Repo = repo\n\n    @property\n    def fs(self):\n        return self.repo.fs\n\n    @locked\n    def add(\n        self,\n        single_stage: bool = False,\n        fname: Optional[str] = None,\n        validate: bool = True,\n        force: bool = False,\n        update_lock: bool = False,\n        **stage_data,\n    ):\n        stage = self.create(\n            single_stage=single_stage,\n            fname=fname,\n            validate=validate,\n            force=force,\n            **stage_data,\n        )\n        stage.dump(update_lock=update_lock)\n        try:\n            stage.ignore_outs()\n        except FileNotFoundError as exc:\n            ui.warn(\n                f\"Could not create .gitignore entry in {exc.filename}.\"\n                \" DVC will attempt to create .gitignore entry again when\"\n                \" the stage is run.\"\n            )\n\n        return stage\n\n    def create(\n        self,\n        single_stage: bool = False,\n        validate: bool = True,\n        fname: Optional[str] = None,\n        force: bool = False,\n        **stage_data,\n    ) -> Union[\"Stage\", \"PipelineStage\"]:\n        \"\"\"Creates a stage.\n\n        Args:\n            single_stage: if true, the .dvc file based stage is created,\n                fname is required in that case\n            fname: name of the file to use, not used for dvc.yaml files\n            validate: if true, the new created stage is checked against the\n                stages in the repo. Eg: graph correctness,\n                potential overwrites in dvc.yaml file (unless `force=True`).\n            force: ignores overwrites in dvc.yaml file\n            stage_data: Stage data to create from\n                (see create_stage and loads_from for more information)\n        \"\"\"\n        from dvc.stage import PipelineStage, Stage, create_stage, restore_fields\n        from dvc.stage.exceptions import InvalidStageName\n        from dvc.stage.utils import is_valid_name, prepare_file_path, validate_kwargs\n\n        stage_data = validate_kwargs(\n            single_stage=single_stage, fname=fname, **stage_data\n        )\n        if single_stage:\n            stage_cls = Stage\n            path = fname or prepare_file_path(stage_data)\n        else:\n            path = PROJECT_FILE\n            stage_cls = PipelineStage\n            stage_name = stage_data[\"name\"]\n            if not (stage_name and is_valid_name(stage_name)):\n                raise InvalidStageName\n\n        stage = create_stage(stage_cls, repo=self.repo, path=path, **stage_data)\n        if validate:\n            if not force:\n                from dvc.stage.utils import check_stage_exists\n\n                check_stage_exists(self.repo, stage, stage.path)\n\n            try:\n                self.repo.check_graph(stages={stage})\n            except OutputDuplicationError as exc:\n                # Don't include the stage currently being added.\n                exc.stages.remove(stage)\n                raise OutputDuplicationError(exc.output, exc.stages) from None\n\n        restore_fields(stage)\n        return stage\n\n    def from_target(\n        self, target: str, accept_group: bool = True, glob: bool = False\n    ) -> StageList:\n        \"\"\"\n        Returns a list of stage from the provided target.\n        (see load method below for further details)\n        \"\"\"\n        path, name = parse_target(target, isa_glob=glob)\n        return self.load_all(path=path, name=name, accept_group=accept_group, glob=glob)\n\n    def get_target(self, target: str) -> \"Stage\":\n        \"\"\"\n        Returns a stage from the provided target.\n        (see load_one method for further details)\n        \"\"\"\n        path, name = parse_target(target)\n        return self.load_one(path=path, name=name)\n\n    def _get_filepath(\n        self, path: Optional[str] = None, name: Optional[str] = None\n    ) -> str:\n        if path:\n            return self.repo.fs.abspath(path)\n\n        path = PROJECT_FILE\n        logger.debug(\"Assuming '%s' to be a stage inside '%s'\", name, path)\n        return path\n\n    @staticmethod\n    def _get_group_keys(stages: \"StageLoader\", group: str) -> Iterable[str]:\n        from dvc.parsing import JOIN\n\n        for key in stages:\n            assert isinstance(key, str)\n            if key.startswith(f\"{group}{JOIN}\"):\n                yield key\n\n    def _get_keys(\n        self,\n        stages: \"StageLoader\",\n        name: Optional[str] = None,\n        accept_group: bool = True,\n        glob: bool = False,\n    ) -> Iterable[str]:\n        if not name:\n            return stages.keys()\n        if accept_group and stages.is_foreach_or_matrix_generated(name):\n            return self._get_group_keys(stages, name)\n        if glob:\n            return fnmatch.filter(stages.keys(), name)\n        return [name]\n\n    def load_all(\n        self,\n        path: Optional[str] = None,\n        name: Optional[str] = None,\n        accept_group: bool = True,\n        glob: bool = False,\n    ) -> StageList:\n        \"\"\"Load a list of stages from a file.\n\n        Args:\n            path: if not provided, default `dvc.yaml` is assumed.\n            name: required for `dvc.yaml` files, ignored for `.dvc` files.\n            accept_group: if true, all of the stages generated from `name`\n                foreach are returned.\n            glob: if true, `name` is considered as a glob, which is\n                used to filter list of stages from the given `path`.\n        \"\"\"\n        from dvc.dvcfile import load_file\n        from dvc.stage.loader import SingleStageLoader, StageLoader\n\n        path = self._get_filepath(path, name)\n        dvcfile = load_file(self.repo, path)\n        # `dvcfile.stages` is not cached\n        stages = dvcfile.stages  # type: ignore[attr-defined]\n\n        if isinstance(stages, SingleStageLoader):\n            stage = stages[name]\n            return [stage]\n\n        assert isinstance(stages, StageLoader)\n        keys = self._get_keys(stages, name, accept_group, glob)\n        return [stages[key] for key in keys]\n\n    def load_one(\n        self, path: Optional[str] = None, name: Optional[str] = None\n    ) -> \"Stage\":\n        \"\"\"Load a single stage from a file.\n\n        Args:\n            path: if not provided, default `dvc.yaml` is assumed.\n            name: required for `dvc.yaml` files, ignored for `.dvc` files.\n        \"\"\"\n        from dvc.dvcfile import load_file\n\n        path = self._get_filepath(path, name)\n        dvcfile = load_file(self.repo, path)\n        stages = dvcfile.stages  # type: ignore[attr-defined]\n\n        return stages[name]\n\n    def load_file(self, path: Optional[str] = None) -> StageList:\n        \"\"\"Load all of the stages from a file.\"\"\"\n        return self.load_all(path)\n\n    def load_glob(self, path: str, expr: Optional[str] = None):\n        \"\"\"Load stages from `path`, filtered with `expr` provided.\"\"\"\n        return self.load_all(path, expr, glob=True)\n\n    def collect(\n        self,\n        target: Optional[str] = None,\n        with_deps: bool = False,\n        recursive: bool = False,\n        graph: Optional[\"DiGraph\"] = None,\n        glob: bool = False,\n    ) -> StageIter:\n        \"\"\"Collect list of stages from the provided target.\n\n        Args:\n            target: if not provided, all of the stages in the graph are\n                returned.\n                Target can be:\n                - a foreach group name or a stage name in the `dvc.yaml` file.\n                - a generated stage name from a foreach group.\n                - a path to `dvc.yaml` or `.dvc` file.\n                - in case of a stage to a dvc.yaml file in a different\n                  directory than current working directory, it can be a path\n                  to dvc.yaml file, followed by a colon `:`, followed by stage\n                  name (eg: `../dvc.yaml:build`).\n                - in case of `recursive`, it can be a path to a directory.\n                - in case of `glob`, it can be a wildcard pattern to match\n                  stages. Example: `build*` for stages in `dvc.yaml` file, or\n                  `../dvc.yaml:build*` for stages in dvc.yaml in a different\n                  directory.\n                  Note that, glob only applies for the stage name, not to the\n                  file, so `**/dvc.yaml:build*` is not possible.\n            with_deps: if true, the stages including their dependencies are\n                returned.\n            recursive: if true and if `target` is a directory, all of the\n                stages inside that directory is returned.\n            graph: graph to use. Defaults to `repo.graph`.\n            glob: Use `target` as a pattern to match stages in a file.\n        \"\"\"\n        if not target:\n            return list(graph) if graph else self.repo.index.stages\n\n        if recursive and self.fs.isdir(target):\n            from dvc.repo.graph import collect_inside_path\n\n            path = self.fs.abspath(target)\n            return collect_inside_path(path, graph or self.repo.index.graph)\n\n        stages = self.from_target(target, glob=glob)\n        if not with_deps:\n            return stages\n\n        return _collect_with_deps(stages, graph or self.repo.index.graph)\n\n    def collect_granular(\n        self,\n        target: Optional[str] = None,\n        with_deps: bool = False,\n        recursive: bool = False,\n        graph: Optional[\"DiGraph\"] = None,\n    ) -> list[StageInfo]:\n        \"\"\"Collects a list of (stage, filter_info) from the given target.\n\n        Priority is in the order of following in case of ambiguity:\n        - .dvc file or .yaml file\n        - dir if recursive and directory exists\n        - foreach_group_name or stage_name\n        - generated stage name from a foreach group\n        - output file\n\n        Args:\n            target: if not provided, all of the stages without any filters are\n                returned.\n                If `target` is a path to a dvc-tracked output,\n                a (stage, output_path) is returned.\n                Otherwise, the details above for `target` in `collect()`\n                applies.\n\n            (see `collect()` for other arguments)\n        \"\"\"\n        if not target:\n            return [StageInfo(stage) for stage in self.repo.index.stages]\n\n        target = as_posix(target)\n\n        stages, file, _ = _collect_specific_target(self, target, with_deps, recursive)\n        if not stages:\n            if not (recursive and self.fs.isdir(target)):\n                try:\n                    (out,) = self.repo.find_outs_by_path(target, strict=False)\n                    return [StageInfo(out.stage, self.fs.abspath(target))]\n                except OutputNotFoundError:\n                    pass\n\n            from dvc.dvcfile import is_valid_filename\n            from dvc.stage.exceptions import StageFileDoesNotExistError, StageNotFound\n\n            try:\n                stages = self.collect(target, with_deps, recursive, graph)\n            except StageFileDoesNotExistError as exc:\n                # collect() might try to use `target` as a stage name\n                # and throw error that dvc.yaml does not exist, whereas it\n                # should say that both stage name and file does not exist.\n                if file and is_valid_filename(file):\n                    raise\n                raise NoOutputOrStageError(target, exc.file) from exc\n            except StageNotFound as exc:\n                raise NoOutputOrStageError(target, exc.file) from exc\n\n        return [StageInfo(stage) for stage in stages]\n"
  },
  {
    "path": "dvc/repo/status.py",
    "content": "from itertools import chain, compress\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\n\nfrom . import locked\n\nlogger = logger.getChild(__name__)\n\n\ndef _joint_status(pairs, check_updates=True):\n    status_info = {}\n\n    for stage, filter_info in pairs:\n        if stage.frozen and not (stage.is_repo_import or stage.is_versioned_import):\n            logger.warning(\n                (\n                    \"%s is frozen. Its dependencies are\"\n                    \" not going to be shown in the status output.\"\n                ),\n                stage,\n            )\n        status_info.update(\n            stage.status(check_updates=check_updates, filter_info=filter_info)\n        )\n\n    return status_info\n\n\ndef _local_status(\n    self, targets=None, with_deps=False, recursive=False, check_updates=True\n):\n    targets = targets or [None]\n    pairs = chain.from_iterable(\n        self.stage.collect_granular(t, with_deps=with_deps, recursive=recursive)\n        for t in targets\n    )\n\n    return _joint_status(pairs, check_updates=check_updates)\n\n\ndef _cloud_status(\n    self,\n    targets=None,\n    jobs=None,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    recursive=False,\n    all_commits=False,\n):\n    \"\"\"Returns a dictionary with the files that are new or deleted.\n\n    - new: Remote doesn't have the file\n    - deleted: File is no longer in the local cache\n    - missing: File doesn't exist neither in the cache, neither in remote\n\n    Example:\n            Given the following commands:\n\n            $ echo \"foo\" > foo\n            $ echo \"bar\" > bar\n            $ dvc add foo bar\n            $ dvc status -c\n\n            It will return something like:\n\n            { \"foo\": \"new\", \"bar\": \"new\" }\n\n            Now, after pushing and removing \"bar\" from the local cache:\n\n            $ dvc push\n            $ rm .dvc/cache/c1/57a79031e1c40f85931829bc5fc552\n\n            The result would be:\n\n            { \"bar\": \"deleted\" }\n    \"\"\"\n    used = self.used_objs(\n        targets,\n        all_branches=all_branches,\n        all_tags=all_tags,\n        all_commits=all_commits,\n        with_deps=with_deps,\n        force=True,\n        remote=remote,\n        jobs=jobs,\n        recursive=recursive,\n        push=True,\n    )\n\n    ret = {}\n    for odb, obj_ids in used.items():\n        if odb is not None:\n            # ignore imported objects\n            continue\n        status_info = self.cloud.status(obj_ids, jobs, remote=remote)\n        for status_ in (\"deleted\", \"new\", \"missing\"):\n            for hash_info in getattr(status_info, status_, []):\n                ret[hash_info.obj_name] = status_\n\n    return ret\n\n\n@locked\ndef status(  # noqa: PLR0913\n    self,\n    targets=None,\n    jobs=None,\n    cloud=False,\n    remote=None,\n    all_branches=False,\n    with_deps=False,\n    all_tags=False,\n    all_commits=False,\n    recursive=False,\n    check_updates=True,\n):\n    if isinstance(targets, str):\n        targets = [targets]\n\n    if cloud or remote:\n        return _cloud_status(\n            self,\n            targets,\n            jobs,\n            all_branches=all_branches,\n            with_deps=with_deps,\n            remote=remote,\n            all_tags=all_tags,\n            all_commits=all_commits,\n            recursive=recursive,\n        )\n\n    ignored = list(\n        compress(\n            [\"--all-branches\", \"--all-tags\", \"--all-commits\", \"--jobs\"],\n            [all_branches, all_tags, all_commits, jobs],\n        )\n    )\n    if ignored:\n        msg = \"The following options are meaningless for local status: {}\"\n        raise InvalidArgumentError(msg.format(\", \".join(ignored)))\n\n    return _local_status(\n        self,\n        targets,\n        with_deps=with_deps,\n        recursive=recursive,\n        check_updates=check_updates,\n    )\n"
  },
  {
    "path": "dvc/repo/trie.py",
    "content": "from funcy import first\nfrom pygtrie import Trie\n\nfrom dvc.exceptions import OutputDuplicationError, OverlappingOutputPathsError\n\n\ndef build_outs_trie(stages):\n    outs = Trie()\n\n    for stage in stages:\n        for out in stage.outs:\n            out_key = out.fs.parts(out.fs_path)\n\n            # Check for dup outs\n            if out_key in outs:\n                dup_stages = [stage, outs[out_key].stage]\n                raise OutputDuplicationError(str(out), set(dup_stages))\n\n            # Check for overlapping outs\n            if outs.has_subtrie(out_key):\n                parent = out\n                overlapping = first(outs.values(prefix=out_key))\n            else:\n                parent = outs.shortest_prefix(out_key).value\n                overlapping = out\n            if parent and overlapping:\n                msg = (\n                    f\"The output paths:\\n'{parent!s}'('{parent.stage.addressing}')\\n\"\n                    f\"'{overlapping!s}'('{overlapping.stage.addressing}')\\n\"\n                    \"overlap and are thus in the same tracked directory.\\n\"\n                    \"To keep reproducibility, outputs should be in separate \"\n                    \"tracked directories or tracked individually.\"\n                )\n                raise OverlappingOutputPathsError(parent, overlapping, msg)\n\n            outs[out_key] = out\n\n    return outs\n"
  },
  {
    "path": "dvc/repo/update.py",
    "content": "from typing import TYPE_CHECKING\n\nfrom dvc.exceptions import InvalidArgumentError\n\nfrom . import locked\n\nif TYPE_CHECKING:\n    from dvc.repo.stage import StageInfo\n\n\n@locked\ndef update(  # noqa: C901\n    self,\n    targets=None,\n    rev=None,\n    recursive=False,\n    to_remote=False,\n    no_download=False,\n    remote=None,\n    jobs=None,\n):\n    from .worktree import update_worktree_stages\n\n    if not targets:\n        targets = [None]\n\n    if isinstance(targets, str):\n        targets = [targets]\n\n    if to_remote and no_download:\n        raise InvalidArgumentError(\"--to-remote can't be used with --no-download\")\n\n    if not to_remote and remote:\n        raise InvalidArgumentError(\"--remote can't be used without --to-remote\")\n\n    import_stages = set()\n    other_stage_infos: list[StageInfo] = []\n\n    for stage_info in self.index.collect_targets(targets, recursive=recursive):\n        if stage_info.stage.is_import:\n            import_stages.add(stage_info.stage)\n        else:\n            other_stage_infos.append(stage_info)\n\n    for stage in import_stages:\n        stage.update(\n            rev,\n            to_remote=to_remote,\n            remote=remote,\n            no_download=no_download,\n            jobs=jobs,\n        )\n        stage.dump()\n\n    if other_stage_infos:\n        if rev:\n            raise InvalidArgumentError(\"--rev can't be used with worktree update\")\n        if no_download:\n            raise InvalidArgumentError(\n                \"--no-download can't be used with worktree update\"\n            )\n        if to_remote:\n            raise InvalidArgumentError(\"--to-remote can't be used with worktree update\")\n        update_worktree_stages(self, other_stage_infos)\n\n    stages = import_stages | {stage_info.stage for stage_info in other_stage_infos}\n    return list(stages)\n"
  },
  {
    "path": "dvc/repo/worktree.py",
    "content": "from collections.abc import Iterable\nfrom functools import partial\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom funcy import first\n\nfrom dvc.log import logger\nfrom dvc.stage.exceptions import StageUpdateError\n\nif TYPE_CHECKING:\n    from dvc.data_cloud import Remote\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.repo.index import Index, IndexView\n    from dvc.repo.stage import StageInfo\n    from dvc.stage import Stage\n    from dvc.types import TargetType\n    from dvc_data.hashfile.meta import Meta\n    from dvc_data.index import DataIndex, DataIndexView\n    from dvc_objects.fs.base import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\n# for files, if our version's checksum (etag) matches the latest remote\n# checksum, we do not need to push, even if the version IDs don't match\ndef _meta_checksum(fs: \"FileSystem\", meta: \"Meta\") -> Any:\n    if not meta or meta.isdir:\n        return meta\n    assert fs.PARAM_CHECKSUM\n    return getattr(meta, fs.PARAM_CHECKSUM)\n\n\ndef worktree_view_by_remotes(\n    index: \"Index\",\n    targets: Optional[\"TargetType\"] = None,\n    push: bool = False,\n    **kwargs: Any,\n) -> Iterable[tuple[Optional[str], \"IndexView\"]]:\n    from dvc.repo.index import IndexView\n\n    def outs_filter(view: \"IndexView\", remote: Optional[str]):\n        def _filter(out: \"Output\") -> bool:\n            if out.remote != remote:\n                return False\n            if view._outs_filter:\n                return view._outs_filter(out)\n            return True\n\n        return _filter\n\n    view = worktree_view(index, targets=targets, push=push, **kwargs)\n    remotes = {out.remote for out in view.outs}\n\n    if len(remotes) <= 1:\n        yield first(remotes), view\n        return\n\n    for remote in remotes:\n        yield remote, IndexView(index, view._stage_infos, outs_filter(view, remote))\n\n\ndef worktree_view(\n    index: \"Index\",\n    targets: Optional[\"TargetType\"] = None,\n    push: bool = False,\n    **kwargs: Any,\n) -> \"IndexView\":\n    \"\"\"Return view of data that can be stored in worktree remotes.\n\n    Args:\n        targets: Optional targets.\n        push: Whether the view should be restricted to pushable data only.\n\n    Additional kwargs will be passed into target collection.\n    \"\"\"\n\n    def stage_filter(stage: \"Stage\") -> bool:\n        return not (push and stage.is_repo_import)\n\n    def outs_filter(out: \"Output\") -> bool:\n        return out.is_in_repo and out.use_cache and (not push or out.can_push)\n\n    return index.targets_view(\n        targets,\n        stage_filter=stage_filter,\n        outs_filter=outs_filter,\n        **kwargs,\n    )\n\n\ndef _get_remote(\n    repo: \"Repo\", name: Optional[str], default: \"Remote\", command: str\n) -> \"Remote\":\n    if name in (None, default.name):\n        return default\n    return repo.cloud.get_remote(name, command)\n\n\ndef _merge_push_meta(  # noqa: C901\n    out: \"Output\",\n    index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote: Optional[str] = None,\n):\n    \"\"\"Merge existing output meta with newly pushed meta.\n\n    Existing version IDs for unchanged files will be preserved to reduce merge\n    conflicts (i.e. the DVC output's version ID may not match the pushed/latest\n    version ID as long when the file content of both versions is the same).\n    \"\"\"\n    from dvc_data.hashfile.tree import Tree\n    from dvc_data.index.save import build_tree\n\n    _, key = out.index_key\n\n    entry = index.get(key)\n    if entry is None:\n        return\n\n    repo = out.stage.repo\n    if out.isdir():\n        old_tree = out.get_obj()\n        assert isinstance(old_tree, Tree)\n        entry.hash_info = old_tree.hash_info\n        entry.meta = out.meta\n        entries = [entry]\n        for subkey, entry in index.iteritems(key):\n            entries.append(entry)\n            if entry.meta is not None and entry.meta.isdir:\n                continue\n            fs_path = repo.fs.join(repo.root_dir, *subkey)\n            meta, hash_info = old_tree.get(repo.fs.relparts(fs_path, out.fs_path)) or (\n                None,\n                None,\n            )\n            entry.hash_info = hash_info\n            if entry.meta:\n                entry.meta.remote = remote\n            if meta is not None and meta.version_id is not None:\n                # preserve existing version IDs for unchanged files in\n                # this dir (entry will have the latest remote version\n                # ID after checkout)\n                entry.meta = meta\n\n        for entry in entries:\n            index.add(entry)\n\n        tree_meta, new_tree = build_tree(index, key)\n        out.obj = new_tree\n        out.hash_info = new_tree.hash_info\n        out.meta = tree_meta\n    else:\n        if entry.hash_info:\n            out.hash_info = entry.hash_info\n        if out.meta is None or out.meta.version_id is None:\n            out.meta = entry.meta\n    if out.meta:\n        out.meta.remote = remote\n\n\ndef update_worktree_stages(repo: \"Repo\", stage_infos: Iterable[\"StageInfo\"]):\n    from dvc.repo.index import IndexView\n\n    def outs_filter(out: \"Output\") -> bool:\n        return out.is_in_repo and out.use_cache and out.can_push\n\n    view = IndexView(repo.index, stage_infos, outs_filter=outs_filter)\n    local_index = view.data[\"repo\"]\n    remote_indexes: dict[str, tuple[Remote, DataIndex]] = {}\n    for stage in view.stages:\n        for out in stage.outs:\n            _update_worktree_out(repo, out, local_index, remote_indexes)\n        stage.dump(with_files=True, update_pipeline=False)\n\n\ndef _update_worktree_out(\n    repo: \"Repo\",\n    out: \"Output\",\n    local_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote_indexes: dict[str, tuple[\"Remote\", \"DataIndex\"]],\n):\n    from dvc_data.index import build\n\n    remote_name = out.remote or (out.meta.remote if out.meta is not None else None)\n    if not remote_name:\n        logger.warning(\"Could not update '%s', it was never pushed to a remote\", out)\n        return\n\n    if remote_name in remote_indexes:\n        remote, remote_index = remote_indexes[remote_name]\n    else:\n        remote = repo.cloud.get_remote(remote_name, \"update\")\n        if not remote.worktree:\n            raise StageUpdateError(out.stage.relpath)\n        logger.debug(\"indexing latest worktree for '%s'\", remote.path)\n        remote_index = build(remote.path, remote.fs)\n        remote_indexes[remote_name] = remote, remote_index\n    _workspace, key = out.index_key\n    if key not in remote_index:\n        logger.warning(\"Could not update '%s', it does not exist in the remote\", out)\n        return\n\n    entry = remote_index[key]\n    if (\n        entry.meta\n        and entry.meta.isdir\n        and not any(\n            subkey != key and subentry.meta and not subentry.meta.isdir\n            for subkey, subentry in remote_index.iteritems(key)\n        )\n    ):\n        logger.warning(\"Could not update '%s', directory is empty in the remote\", out)\n        return\n\n    _fetch_out_changes(repo, out, local_index, remote_index, remote)\n    _update_out_meta(repo, out, local_index, remote_index, remote)\n\n\ndef _fetch_out_changes(\n    repo: \"Repo\",\n    out: \"Output\",\n    local_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote: \"Remote\",\n):\n    from dvc.fs.callbacks import TqdmCallback\n    from dvc_data.index.checkout import apply, compare\n\n    old, new = _get_diff_indexes(out, local_index, remote_index)\n\n    with TqdmCallback(unit=\"entry\", desc=\"Comparing indexes\") as cb:\n        diff = compare(\n            old,\n            new,\n            delete=True,\n            meta_only=True,\n            meta_cmp_key=partial(_meta_checksum, remote.fs),\n            callback=cb,\n        )\n\n    total = len(new)\n    with TqdmCallback(unit=\"file\", desc=f\"Updating '{out}'\", disable=total == 0) as cb:\n        cb.set_size(total)\n        apply(\n            diff,\n            repo.root_dir,\n            out.fs,\n            update_meta=False,\n            storage=\"data\",\n            callback=cb,\n        )\n        out.save()\n\n\ndef _get_diff_indexes(\n    out: \"Output\",\n    local_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote_index: Union[\"DataIndex\", \"DataIndexView\"],\n) -> tuple[\"DataIndex\", \"DataIndex\"]:\n    from dvc_data.index import DataIndex\n\n    _, key = out.index_key\n    old = DataIndex()\n    new = DataIndex()\n    for _, entry in local_index.iteritems(key):\n        old.add(entry)\n    for _, entry in remote_index.iteritems(key):\n        new.add(entry)\n\n    for prefix, storage in local_index.storage_map.items():\n        old.storage_map[prefix] = storage\n\n    for prefix, storage in remote_index.storage_map.items():\n        new.storage_map[prefix] = storage\n\n    return old, new\n\n\ndef _update_out_meta(\n    repo: \"Repo\",\n    out: \"Output\",\n    local_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote: \"Remote\",\n):\n    from dvc_data.index.save import build_tree\n\n    index = _get_update_diff_index(repo, out, local_index, remote_index, remote)\n\n    _, key = out.index_key\n    entry = index[key]\n    if out.isdir():\n        tree_meta, new_tree = build_tree(index, key)\n        out.obj = new_tree\n        out.hash_info = new_tree.hash_info\n        out.meta = tree_meta\n    else:\n        if entry.hash_info:\n            out.hash_info = entry.hash_info\n        out.meta = entry.meta\n    if out.meta:\n        out.meta.remote = remote.name\n\n\ndef _get_update_diff_index(\n    repo: \"Repo\",\n    out: \"Output\",\n    local_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote_index: Union[\"DataIndex\", \"DataIndexView\"],\n    remote: \"Remote\",\n) -> \"DataIndex\":\n    from dvc_data.hashfile.tree import Tree\n    from dvc_data.index import DataIndex\n    from dvc_data.index.diff import ADD, MODIFY, UNCHANGED, diff\n\n    old, new = _get_diff_indexes(out, local_index, remote_index)\n    index = DataIndex()\n    for change in diff(\n        old,\n        new,\n        meta_only=True,\n        meta_cmp_key=partial(_meta_checksum, remote.fs),\n        with_unchanged=True,\n    ):\n        if change.typ in (ADD, MODIFY):\n            entry = change.new\n            # preserve md5's which were calculated in out.save() after\n            # downloading\n            if out.isdir():\n                if not entry.meta.isdir:\n                    fs_path = repo.fs.join(repo.root_dir, *entry.key)\n                    tree = out.obj\n                    assert isinstance(tree, Tree)\n                    _, entry.hash_info = tree.get(  # type: ignore[misc]\n                        repo.fs.relparts(fs_path, out.fs_path)\n                    )\n            else:\n                entry.hash_info = out.hash_info\n            index[change.new.key] = change.new\n        elif change.typ == UNCHANGED:\n            index[change.old.key] = change.old\n    return index\n"
  },
  {
    "path": "dvc/rwlock.py",
    "content": "import json\nimport os\nfrom collections import defaultdict\nfrom contextlib import contextmanager\n\nimport psutil\nfrom voluptuous import Invalid, Optional, Required, Schema\n\nfrom dvc.log import logger\n\nfrom .exceptions import DvcException\nfrom .fs import localfs\nfrom .lock import make_lock\nfrom .utils import relpath\n\nlogger = logger.getChild(__name__)\n\n\nINFO_SCHEMA = {Required(\"pid\"): int, Required(\"cmd\"): str}\n\nSCHEMA = Schema(\n    {\n        Optional(\"write\", default={}): {str: INFO_SCHEMA},\n        Optional(\"read\", default={}): {str: [INFO_SCHEMA]},\n    }\n)\n\nRWLOCK_FILE = \"rwlock\"\nRWLOCK_LOCK = \"rwlock.lock\"\n\n\nclass RWLockFileCorruptedError(DvcException):\n    def __init__(self, path):\n        super().__init__(\n            f\"Unable to read RWLock-file {relpath(path)!r}. JSON structure is corrupted\"\n        )\n\n\nclass RWLockFileFormatError(DvcException):\n    def __init__(self, path):\n        super().__init__(f\"RWLock-file {relpath(path)!r} format error.\")\n\n\n@contextmanager\ndef _edit_rwlock(lock_dir, fs, hardlink):\n    path = fs.join(lock_dir, RWLOCK_FILE)\n\n    rwlock_guard = make_lock(\n        fs.join(lock_dir, RWLOCK_LOCK),\n        tmp_dir=lock_dir,\n        hardlink_lock=hardlink,\n    )\n    with rwlock_guard:\n        try:\n            with fs.open(path, encoding=\"utf-8\") as fobj:\n                lock = SCHEMA(json.load(fobj))\n        except FileNotFoundError:\n            lock = SCHEMA({})\n        except json.JSONDecodeError as exc:\n            raise RWLockFileCorruptedError(path) from exc\n        except Invalid as exc:\n            raise RWLockFileFormatError(path) from exc\n        lock[\"read\"] = defaultdict(list, lock[\"read\"])\n        lock[\"write\"] = defaultdict(dict, lock[\"write\"])\n        yield lock\n        with fs.open(path, \"w\", encoding=\"utf-8\") as fobj:\n            json.dump(lock, fobj)\n\n\ndef _infos_to_str(infos):\n    return \"\\n\".join(\n        \"  (PID {}): {}\".format(info[\"pid\"], info[\"cmd\"]) for info in infos\n    )\n\n\ndef _check_blockers(tmp_dir, lock, info, *, mode, waiters):  # noqa: C901, PLR0912\n    from .lock import LockError\n\n    non_existing_pid = set()\n\n    blockers = []\n    to_release = defaultdict(list)\n    for path, infos in lock[mode].items():\n        for waiter_path in waiters:\n            if localfs.overlaps(waiter_path, path):\n                break\n        else:\n            continue\n\n        infos = infos if isinstance(infos, list) else [infos]\n        for blocker in infos:\n            if blocker == info:\n                continue\n\n            pid = int(blocker[\"pid\"])\n\n            if pid in non_existing_pid:\n                pass\n            elif psutil.pid_exists(pid):\n                blockers.append(blocker)\n                continue\n            else:\n                non_existing_pid.add(pid)\n                cmd = blocker[\"cmd\"]\n                logger.warning(\n                    (\n                        \"Process '%s' with (Pid %s), in RWLock-file '%s'\"\n                        \" had been killed. Auto removed it from the lock file.\"\n                    ),\n                    cmd,\n                    pid,\n                    relpath(path),\n                )\n            to_release[json.dumps(blocker, sort_keys=True)].append(path)\n\n    if to_release:\n        for info_json, path_list in to_release.items():\n            info = json.loads(info_json)\n            if mode == \"read\":\n                _release_read(lock, info, path_list)\n            elif mode == \"write\":\n                _release_write(lock, info, path_list)\n\n    if blockers:\n        raise LockError(\n            f\"'{waiter_path}' is busy, it is being blocked by:\\n\"\n            f\"{_infos_to_str(blockers)}\\n\"\n            \"\\n\"\n            \"If there are no processes with such PIDs, you can manually \"\n            f\"remove '{tmp_dir}/rwlock' and try again.\"\n        )\n\n\ndef _acquire_read(lock, info, paths):\n    changes = []\n\n    lock[\"read\"] = lock.get(\"read\", defaultdict(list))\n\n    for path in paths:\n        readers = lock[\"read\"][path]\n        if info in readers:\n            continue\n\n        changes.append(path)\n        readers.append(info)\n\n    return changes\n\n\ndef _acquire_write(lock, info, paths):\n    changes = []\n\n    lock[\"write\"] = lock.get(\"write\", defaultdict(dict))\n\n    for path in paths:\n        if lock[\"write\"][path] == info:\n            continue\n\n        changes.append(path)\n        lock[\"write\"][path] = info\n\n    return changes\n\n\ndef _release_write(lock, info, changes):\n    for url in changes:\n        assert \"write\" in lock\n        assert url in lock[\"write\"]\n        assert lock[\"write\"][url] == info\n        del lock[\"write\"][url]\n        if not lock[\"write\"]:\n            del lock[\"write\"]\n\n\ndef _release_read(lock, info, changes):\n    for url in changes:\n        assert \"read\" in lock\n        assert url in lock[\"read\"]\n        assert info in lock[\"read\"][url]\n        lock[\"read\"][url].remove(info)\n        if not lock[\"read\"][url]:\n            del lock[\"read\"][url]\n        if not lock[\"read\"]:\n            del lock[\"read\"]\n\n\n@contextmanager\ndef rwlock(tmp_dir, fs, cmd, read, write, hardlink):\n    \"\"\"Create non-thread-safe RWLock for file paths.\n\n    Args:\n        tmp_dir (str): existing directory where to create the rwlock file.\n        fs (FileSystem): fs instance that tmp_dir belongs to.\n        cmd (str): command that will be working on these file path.\n        read ([str]): file paths that are going to be read.\n        write ([str]): file paths that are going to be written.\n        hardlink (bool): use hardlink lock to guard rwlock file when on edit.\n\n    Raises:\n        LockError: raised if file paths we want to read is being written to by\n            another command or if file paths we want to write is being written\n            to or read from by another command.\n        RWLockFileCorruptedError: raised if rwlock file is not a valid JSON.\n        RWLockFileFormatError: raised if rwlock file is a valid JSON, but\n            has internal format that doesn't pass our schema validation.\n    \"\"\"\n    info = {\"pid\": os.getpid(), \"cmd\": cmd}\n\n    with _edit_rwlock(tmp_dir, fs, hardlink) as lock:\n        _check_blockers(tmp_dir, lock, info, mode=\"write\", waiters=read + write)\n        _check_blockers(tmp_dir, lock, info, mode=\"read\", waiters=write)\n\n        rchanges = _acquire_read(lock, info, read)\n        wchanges = _acquire_write(lock, info, write)\n\n    try:\n        yield\n    finally:\n        with _edit_rwlock(tmp_dir, fs, hardlink) as lock:\n            _release_write(lock, info, wchanges)\n            _release_read(lock, info, rchanges)\n"
  },
  {
    "path": "dvc/schema.py",
    "content": "from collections.abc import Mapping\nfrom typing import Any\n\nimport voluptuous as vol\n\nfrom dvc import dependency, output\nfrom dvc.annotations import ANNOTATION_SCHEMA, ARTIFACT_SCHEMA\nfrom dvc.output import (\n    CHECKSUMS_SCHEMA,\n    CLOUD_SCHEMA,\n    DIR_FILES_SCHEMA,\n    META_SCHEMA,\n    Output,\n)\nfrom dvc.parsing import DO_KWD, FOREACH_KWD, MATRIX_KWD, VARS_KWD\nfrom dvc.stage.params import StageParams\n\nSTAGES = \"stages\"\nSINGLE_STAGE_SCHEMA = {\n    StageParams.PARAM_MD5: output.CHECKSUM_SCHEMA,\n    StageParams.PARAM_WDIR: vol.Any(str, None),\n    StageParams.PARAM_DEPS: vol.Any([dependency.SCHEMA], None),\n    StageParams.PARAM_OUTS: vol.Any([output.SCHEMA], None),\n    StageParams.PARAM_LOCKED: bool,  # backward compatibility\n    StageParams.PARAM_FROZEN: bool,\n    StageParams.PARAM_META: object,\n    StageParams.PARAM_ALWAYS_CHANGED: bool,\n    StageParams.PARAM_DESC: str,\n}\n\nDATA_SCHEMA: dict[Any, Any] = {\n    **CHECKSUMS_SCHEMA,\n    **META_SCHEMA,\n    vol.Required(\"path\"): str,\n    Output.PARAM_CLOUD: CLOUD_SCHEMA,\n    Output.PARAM_FILES: [DIR_FILES_SCHEMA],\n    Output.PARAM_HASH: str,\n    **dependency.DatasetDependency.DATASET_SCHEMA,\n}\nLOCK_FILE_STAGE_SCHEMA = {\n    vol.Required(StageParams.PARAM_CMD): vol.Any(str, list),\n    StageParams.PARAM_DEPS: [DATA_SCHEMA],\n    StageParams.PARAM_PARAMS: {str: {str: object}},\n    StageParams.PARAM_OUTS: [DATA_SCHEMA],\n}\n\nLOCKFILE_STAGES_SCHEMA = {str: LOCK_FILE_STAGE_SCHEMA}\nLOCKFILE_SCHEMA = {\n    vol.Required(\"schema\"): vol.Equal(\"2.0\", \"invalid schema version\"),\n    \"datasets\": object,\n    STAGES: LOCKFILE_STAGES_SCHEMA,\n}\n\nOUT_PSTAGE_DETAILED_SCHEMA = {\n    str: {\n        **ANNOTATION_SCHEMA,  # type: ignore[arg-type]\n        Output.PARAM_CACHE: bool,\n        Output.PARAM_PERSIST: bool,\n        \"checkpoint\": bool,\n        Output.PARAM_REMOTE: str,\n        Output.PARAM_PUSH: bool,\n    }\n}\n\nPLOTS = \"plots\"\nPLOT_PROPS = {\n    Output.PARAM_PLOT_TEMPLATE: str,\n    Output.PARAM_PLOT_X: str,\n    Output.PARAM_PLOT_Y: str,\n    Output.PARAM_PLOT_X_LABEL: str,\n    Output.PARAM_PLOT_Y_LABEL: str,\n    Output.PARAM_PLOT_TITLE: str,\n    Output.PARAM_PLOT_HEADER: bool,\n}\nPLOT_PROPS_SCHEMA = OUT_PSTAGE_DETAILED_SCHEMA[str] | PLOT_PROPS\nPLOT_PSTAGE_SCHEMA = {str: vol.Any(PLOT_PROPS_SCHEMA, [PLOT_PROPS_SCHEMA])}\n\nPARAM_PSTAGE_NON_DEFAULT_SCHEMA = {str: [str]}\n\nVARS_SCHEMA = [str, dict]\n\nSTAGE_DEFINITION = {\n    MATRIX_KWD: {str: vol.Any(str, list)},\n    vol.Required(StageParams.PARAM_CMD): vol.Any(str, list),\n    vol.Optional(StageParams.PARAM_WDIR): str,\n    vol.Optional(StageParams.PARAM_DEPS): [str],\n    vol.Optional(StageParams.PARAM_PARAMS): [vol.Any(str, dict)],\n    vol.Optional(VARS_KWD): VARS_SCHEMA,\n    vol.Optional(StageParams.PARAM_FROZEN): bool,\n    vol.Optional(StageParams.PARAM_META): object,\n    vol.Optional(StageParams.PARAM_DESC): str,\n    vol.Optional(StageParams.PARAM_ALWAYS_CHANGED): bool,\n    vol.Optional(StageParams.PARAM_OUTS): [vol.Any(str, OUT_PSTAGE_DETAILED_SCHEMA)],\n    vol.Optional(StageParams.PARAM_METRICS): [vol.Any(str, OUT_PSTAGE_DETAILED_SCHEMA)],\n    vol.Optional(StageParams.PARAM_PLOTS): [vol.Any(str, PLOT_PSTAGE_SCHEMA)],\n}\n\n\ndef either_or(primary, fallback, fallback_includes=None):\n    def validator(data):\n        schema = primary\n        if isinstance(data, Mapping) and set(fallback_includes or []) & data.keys():\n            schema = fallback\n        return vol.Schema(schema)(data)\n\n    return validator\n\n\nPLOT_DEFINITION = {\n    Output.PARAM_PLOT_X: vol.Any(str, {str: str}),\n    Output.PARAM_PLOT_Y: vol.Any(str, [str], {str: vol.Any(str, [str])}),\n    Output.PARAM_PLOT_X_LABEL: str,\n    Output.PARAM_PLOT_Y_LABEL: str,\n    Output.PARAM_PLOT_TITLE: str,\n    Output.PARAM_PLOT_TEMPLATE: str,\n}\nSINGLE_PLOT_SCHEMA = {vol.Required(str): vol.Any(PLOT_DEFINITION, None)}\nARTIFACTS = \"artifacts\"\nSINGLE_ARTIFACT_SCHEMA = vol.Schema({str: ARTIFACT_SCHEMA})\nFOREACH_IN = {\n    vol.Required(FOREACH_KWD): vol.Any(dict, list, str),\n    vol.Required(DO_KWD): STAGE_DEFINITION,\n}\nSINGLE_PIPELINE_STAGE_SCHEMA = {\n    str: either_or(STAGE_DEFINITION, FOREACH_IN, [FOREACH_KWD, DO_KWD])\n}\n\nDATASET_SCHEMA = vol.Schema(\n    {vol.Required(\"type\"): str, vol.Required(\"name\"): str}, extra=vol.ALLOW_EXTRA\n)\nMULTI_STAGE_SCHEMA = {\n    \"datasets\": object,\n    PLOTS: [vol.Any(str, SINGLE_PLOT_SCHEMA)],\n    STAGES: SINGLE_PIPELINE_STAGE_SCHEMA,\n    VARS_KWD: VARS_SCHEMA,\n    StageParams.PARAM_PARAMS: [str],\n    StageParams.PARAM_METRICS: [str],\n    ARTIFACTS: SINGLE_ARTIFACT_SCHEMA,\n}\n\nCOMPILED_SINGLE_STAGE_SCHEMA = vol.Schema(SINGLE_STAGE_SCHEMA)\nCOMPILED_MULTI_STAGE_SCHEMA = vol.Schema(MULTI_STAGE_SCHEMA)\nCOMPILED_LOCK_FILE_STAGE_SCHEMA = vol.Schema(LOCK_FILE_STAGE_SCHEMA)\nCOMPILED_LOCKFILE_SCHEMA = vol.Schema(LOCKFILE_SCHEMA)\n"
  },
  {
    "path": "dvc/scm.py",
    "content": "\"\"\"Manages source control systems (e.g. Git).\"\"\"\n\nimport os\nfrom collections.abc import Iterable, Iterator, Mapping\nfrom contextlib import contextmanager\nfrom functools import partial\nfrom typing import TYPE_CHECKING, Literal, Optional, Union, overload\n\nfrom funcy import group_by\nfrom scmrepo.base import Base  # noqa: TC002\nfrom scmrepo.git import Git\nfrom scmrepo.noscm import NoSCM\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\nfrom dvc.progress import Tqdm\n\nif TYPE_CHECKING:\n    from scmrepo.progress import GitProgressEvent\n\n    from dvc.fs import FileSystem\n\nlogger = logger.getChild(__name__)\n\n\nclass SCMError(DvcException):\n    \"\"\"Base class for source control management errors.\"\"\"\n\n\nclass CloneError(SCMError):\n    pass\n\n\nclass RevError(SCMError):\n    pass\n\n\nclass NoSCMError(SCMError):\n    def __init__(self):\n        msg = (\n            \"Only supported for Git repositories. If you're \"\n            \"seeing this error in a Git repo, try updating the DVC \"\n            \"configuration with `dvc config core.no_scm false`.\"\n        )\n        super().__init__(msg)\n\n\nclass InvalidRemoteSCMRepo(SCMError):  # noqa: N818\n    pass\n\n\nclass GitAuthError(SCMError):\n    def __init__(self, reason: str) -> None:\n        doc = \"See https://dvc.org/doc/user-guide/troubleshooting#git-auth\"\n        super().__init__(f\"{reason}\\n{doc}\")\n\n\n@contextmanager\ndef map_scm_exception(with_cause: bool = False) -> Iterator[None]:\n    from scmrepo.exceptions import SCMError as InternalSCMError\n\n    try:\n        yield\n    except InternalSCMError as exc:\n        into = SCMError(str(exc))\n        if with_cause:\n            raise into from exc\n        raise into  # noqa: B904\n\n\n@overload\ndef SCM(\n    root_dir: str,\n    *,\n    search_parent_directories: bool = ...,\n    no_scm: Literal[False] = ...,\n) -> \"Git\": ...\n\n\n@overload\ndef SCM(\n    root_dir: str,\n    *,\n    search_parent_directories: bool = ...,\n    no_scm: Literal[True],\n) -> \"NoSCM\": ...\n\n\n@overload\ndef SCM(\n    root_dir: str,\n    *,\n    search_parent_directories: bool = ...,\n    no_scm: bool = ...,\n) -> Union[\"Git\", \"NoSCM\"]: ...\n\n\ndef SCM(root_dir, *, search_parent_directories=True, no_scm=False):\n    \"\"\"Returns SCM instance that corresponds to a repo at the specified\n    path.\n\n    Args:\n        root_dir (str): path to a root directory of the repo.\n        search_parent_directories (bool): whether to look for repo root in\n        parent directories.\n        no_scm (bool): return NoSCM if True.\n\n    Returns:\n        dvc.scm.base.Base: SCM instance.\n    \"\"\"\n    with map_scm_exception():\n        if no_scm:\n            return NoSCM(root_dir, _raise_not_implemented_as=NoSCMError)\n        return Git(root_dir, search_parent_directories=search_parent_directories)\n\n\nclass TqdmGit(Tqdm):\n    BAR_FMT = (\n        \"{desc}|{bar}|{postfix[info]}{n_fmt}/{total_fmt} [{elapsed}, {rate_fmt:>11}]\"\n    )\n\n    def __init__(self, *args, **kwargs):\n        kwargs.setdefault(\"unit\", \"obj\")\n        kwargs.setdefault(\"bar_format\", self.BAR_FMT)\n        super().__init__(*args, **kwargs)\n        self._last_phase = None\n\n    def update_git(self, event: \"GitProgressEvent\") -> None:\n        phase, completed, total, message, *_ = event\n        if phase:\n            message = (phase + \" | \" + message) if message else phase\n        if message:\n            self.set_msg(message)\n        force_refresh = (  # force-refresh progress bar when:\n            (total and completed and completed >= total)  # the task completes\n            or total != self.total  # the total changes\n            or phase != self._last_phase  # or, the phase changes\n        )\n        if completed is not None:\n            self.update_to(completed, total)\n        if force_refresh:\n            self.refresh()\n        self._last_phase = phase\n\n\ndef clone(url: str, to_path: str, **kwargs):\n    from scmrepo.exceptions import CloneError as InternalCloneError\n\n    from dvc.repo.experiments.utils import fetch_all_exps\n\n    with TqdmGit(desc=f\"Cloning {os.path.basename(url)}\") as pbar:\n        try:\n            git = Git.clone(url, to_path, progress=pbar.update_git, **kwargs)\n            if \"shallow_branch\" not in kwargs:\n                fetch_all_exps(git, url, progress=pbar.update_git)\n            return git\n        except InternalCloneError as exc:\n            raise CloneError(\"SCM error\") from exc\n\n\ndef resolve_rev(scm: Union[\"Git\", \"NoSCM\"], rev: str) -> str:\n    from scmrepo.exceptions import RevError as InternalRevError\n\n    from dvc.repo.experiments.utils import fix_exp_head\n\n    try:\n        return scm.resolve_rev(fix_exp_head(scm, rev))\n    except InternalRevError as exc:\n        assert isinstance(scm, Git)\n        # `scm` will only resolve git branch and tag names,\n        # if rev is not a sha it may be an abbreviated experiment name\n        if not (rev == \"HEAD\" or rev.startswith(\"refs/\")):\n            from dvc.repo.experiments.utils import AmbiguousExpRefInfo, resolve_name\n\n            try:\n                ref_infos = resolve_name(scm, rev).get(rev)\n            except AmbiguousExpRefInfo:\n                raise RevError(f\"ambiguous Git revision '{rev}'\")  # noqa: B904\n            if ref_infos:\n                return scm.get_ref(str(ref_infos))\n\n        raise RevError(str(exc))  # noqa: B904\n\n\ndef _get_n_commits(scm: \"Git\", revs: list[str], num: int) -> list[str]:\n    results = []\n    for rev in revs:\n        if num == 0:\n            continue\n        results.append(rev)\n        n = 1\n        while True:\n            if num == n:\n                break\n            try:\n                head = f\"{rev}~{n}\"\n                results.append(resolve_rev(scm, head))\n            except RevError:\n                break\n            n += 1\n    return results\n\n\ndef iter_revs(\n    scm: \"Git\",\n    revs: Optional[list[str]] = None,\n    num: int = 1,\n    all_branches: bool = False,\n    all_tags: bool = False,\n    all_commits: bool = False,\n    all_experiments: bool = False,\n    commit_date: Optional[str] = None,\n) -> Mapping[str, list[str]]:\n    from scmrepo.exceptions import SCMError as _SCMError\n\n    from dvc.repo.experiments.utils import exp_commits\n\n    if not any(\n        [\n            revs,\n            all_branches,\n            all_tags,\n            all_commits,\n            all_experiments,\n            commit_date,\n        ]\n    ):\n        return {}\n\n    revs = revs or []\n    results: list[str] = _get_n_commits(scm, revs, num)\n\n    if all_commits:\n        results.extend(scm.list_all_commits())\n    else:\n        if all_branches:\n            results.extend(_get_n_commits(scm, scm.list_branches(), num))\n\n        if all_tags:\n            results.extend(scm.list_tags())\n\n        if commit_date:\n            from datetime import datetime\n\n            commit_datestamp = (\n                datetime.strptime(commit_date, \"%Y-%m-%d\").timestamp()  # noqa: DTZ007\n            )\n\n            def _time_filter(rev):\n                try:\n                    return scm.resolve_commit(rev).commit_time >= commit_datestamp\n                except _SCMError:\n                    return True\n\n            results.extend(filter(_time_filter, scm.list_all_commits()))\n\n    if all_experiments:\n        results.extend(exp_commits(scm))\n\n    rev_resolver = partial(resolve_rev, scm)\n    grouped = group_by(rev_resolver, results)\n    for rev, names in grouped.items():\n        grouped[rev] = list(dict.fromkeys(names))\n    return grouped\n\n\ndef lfs_prefetch(fs: \"FileSystem\", paths: list[str]):\n    from scmrepo.git.lfs import fetch as _lfs_fetch\n\n    from dvc.fs.dvc import DVCFileSystem\n    from dvc.fs.git import GitFileSystem\n\n    if isinstance(fs, DVCFileSystem) and isinstance(fs.repo.fs, GitFileSystem):\n        git_fs = fs.repo.fs\n        scm = fs.repo.scm\n        assert isinstance(scm, Git)\n    else:\n        return\n\n    try:\n        if \"filter=lfs\" not in git_fs.open(\".gitattributes\").read():\n            return\n    except OSError:\n        return\n    with TqdmGit(desc=\"Checking for Git-LFS objects\") as pbar:\n        _lfs_fetch(\n            scm,\n            [git_fs.rev],\n            include=[(path if path.startswith(\"/\") else f\"/{path}\") for path in paths],\n            progress=pbar.update_git,\n        )\n\n\ndef add_no_submodules(\n    scm: \"Base\",\n    paths: Union[str, Iterable[str]],\n    **kwargs,\n) -> None:\n    \"\"\"Stage paths to Git, excluding those inside submodules.\"\"\"\n\n    if isinstance(paths, str):\n        paths = [paths]\n\n    submodule_roots = {os.path.join(scm.root_dir, sub) for sub in scm.list_submodules()}\n\n    repo_paths: list[str] = []\n    skipped_paths: list[str] = []\n\n    for p in paths:\n        abs_path = os.path.abspath(p)\n        if any(\n            abs_path == root or abs_path.startswith(root + os.sep)\n            for root in submodule_roots\n        ):\n            skipped_paths.append(p)\n        else:\n            repo_paths.append(p)\n\n    if skipped_paths:\n        msg = \"Skipping staging for path(s) inside submodules: %s\"\n        logger.debug(msg, \", \".join(skipped_paths))\n\n    scm.add(repo_paths, **kwargs)\n"
  },
  {
    "path": "dvc/stage/__init__.py",
    "content": "import os\nimport string\nfrom collections import defaultdict\nfrom collections.abc import Iterable\nfrom contextlib import suppress\nfrom dataclasses import dataclass\nfrom typing import TYPE_CHECKING, Any, Optional, TypeVar, Union\n\nfrom funcy import project\n\nfrom dvc import prompt\nfrom dvc.exceptions import CacheLinkError, CheckoutError, DvcException, MergeError\nfrom dvc.log import logger\nfrom dvc.utils import relpath\nfrom dvc.utils.objects import cached_property\n\nfrom . import params\nfrom .decorators import rwlocked\nfrom .exceptions import StageUpdateError\nfrom .imports import sync_import, update_import\nfrom .run import run_stage\nfrom .utils import (\n    check_circular_dependency,\n    check_duplicated_arguments,\n    check_missing_outputs,\n    check_no_externals,\n    check_stage_path,\n    compute_md5,\n    fill_stage_dependencies,\n    fill_stage_outputs,\n    get_dump,\n)\n\nif TYPE_CHECKING:\n    from dvc.dependency import Dependency, ParamsDependency\n    from dvc.dvcfile import ProjectFile, SingleStageFile\n    from dvc.output import Output\n    from dvc.repo import Repo\n    from dvc.types import StrPath\n    from dvc_data.hashfile.db import HashFileDB\n    from dvc_data.hashfile.hash_info import HashInfo\n    from dvc_objects.db import ObjectDB\n\nlogger = logger.getChild(__name__)\n# Disallow all punctuation characters except hyphen and underscore\nINVALID_STAGENAME_CHARS = set(string.punctuation) - {\"_\", \"-\"}\nEnv = dict[str, str]\nChangedEntries = tuple[list[str], list[str], Optional[str]]\n\n_T = TypeVar(\"_T\")\n\n\ndef loads_from(\n    cls: type[_T], repo: \"Repo\", path: str, wdir: str, data: dict[str, Any]\n) -> _T:\n    kw = {\n        \"repo\": repo,\n        \"path\": path,\n        \"wdir\": wdir,\n        **project(\n            data,\n            [\n                Stage.PARAM_CMD,\n                Stage.PARAM_LOCKED,\n                Stage.PARAM_FROZEN,\n                Stage.PARAM_ALWAYS_CHANGED,\n                Stage.PARAM_MD5,\n                Stage.PARAM_DESC,\n                Stage.PARAM_META,\n                \"name\",\n            ],\n        ),\n    }\n    return cls(**kw)\n\n\n@dataclass\nclass RawData:\n    parametrized: bool = False\n    generated_from: Optional[str] = None\n\n\ndef create_stage(cls: type[_T], repo, path, **kwargs) -> _T:\n    from dvc.dvcfile import check_dvcfile_path\n\n    wdir = os.path.abspath(kwargs.get(\"wdir\") or os.curdir)\n    path = os.path.abspath(path)\n\n    check_dvcfile_path(repo, path)\n    check_stage_path(repo, wdir, is_wdir=kwargs.get(\"wdir\"))\n    check_stage_path(repo, os.path.dirname(path))\n\n    stage = loads_from(cls, repo, path, wdir, kwargs)\n    fill_stage_outputs(stage, **kwargs)\n    check_no_externals(stage)\n    fill_stage_dependencies(\n        stage, **project(kwargs, [\"deps\", \"erepo\", \"params\", \"fs_config\", \"db\"])\n    )\n    check_circular_dependency(stage)\n    check_duplicated_arguments(stage)\n\n    return stage\n\n\ndef restore_fields(stage: \"Stage\") -> None:\n    from .exceptions import StageNotFound\n\n    if not stage.dvcfile.exists():\n        return\n\n    try:\n        old = stage.reload()\n    except StageNotFound:\n        return\n\n    # will be used to restore comments later\n\n    stage._stage_text = old._stage_text\n    stage.meta = old.meta\n    stage.desc = old.desc\n\n    old_outs = {out.def_path: out for out in old.outs}\n    for out in stage.outs:\n        old_out = old_outs.get(out.def_path)\n        if old_out is not None:\n            out.restore_fields(old_out)\n\n\nclass Stage(params.StageParams):\n    def __init__(  # noqa: PLR0913\n        self,\n        repo,\n        path=None,\n        cmd=None,\n        wdir=os.curdir,\n        deps=None,\n        outs=None,\n        md5=None,\n        locked=False,  # backward compatibility\n        frozen=False,\n        always_changed=False,\n        stage_text=None,\n        dvcfile=None,\n        desc: Optional[str] = None,\n        meta=None,\n    ):\n        if deps is None:\n            deps = []\n        if outs is None:\n            outs = []\n\n        self.repo = repo\n        self._path = path\n        self.cmd = cmd\n        self.wdir = wdir\n        self.outs: list[Output] = outs\n        self.deps: list[Dependency] = deps\n        self.md5 = md5\n        self.frozen = locked or frozen\n        self.always_changed = always_changed\n        self._stage_text = stage_text\n        self._dvcfile = dvcfile\n        self.desc: Optional[str] = desc\n        self.meta = meta\n        self.raw_data = RawData()\n\n    @property\n    def path(self) -> str:\n        return self._path\n\n    @path.setter\n    def path(self, path: str):\n        self._path = path\n        self.__dict__.pop(\"path_in_repo\", None)\n        self.__dict__.pop(\"relpath\", None)\n\n    @property\n    def dvcfile(self) -> Union[\"ProjectFile\", \"SingleStageFile\"]:\n        if self.path and self._dvcfile and self.path == self._dvcfile.path:\n            return self._dvcfile\n\n        if not self.path:\n            raise DvcException(\n                \"Stage does not have any path set and is detached from dvcfile.\"\n            )\n\n        from dvc.dvcfile import load_file\n\n        self._dvcfile = load_file(self.repo, self.path)\n        return self._dvcfile\n\n    @dvcfile.setter\n    def dvcfile(self, dvcfile: Union[\"ProjectFile\", \"SingleStageFile\"]) -> None:\n        self._dvcfile = dvcfile\n\n    @property\n    def params(self) -> list[\"ParamsDependency\"]:\n        from dvc.dependency import ParamsDependency\n\n        return [dep for dep in self.deps if isinstance(dep, ParamsDependency)]\n\n    @property\n    def metrics(self) -> list[\"Output\"]:\n        return [out for out in self.outs if out.metric]\n\n    def __repr__(self):\n        return f\"Stage: '{self.addressing}'\"\n\n    def __str__(self):\n        return f\"stage: '{self.addressing}'\"\n\n    @property\n    def addressing(self) -> str:\n        \"\"\"\n        Useful for alternative presentations where we don't need\n        `Stage:` prefix.\n        \"\"\"\n        return self.relpath if self.path else \"No path\"\n\n    def __hash__(self):\n        return hash(self.path_in_repo)\n\n    def __eq__(self, other):\n        return (\n            self.__class__ == other.__class__\n            and self.repo is other.repo\n            and self.path_in_repo == other.path_in_repo\n        )\n\n    @cached_property\n    def path_in_repo(self) -> str:\n        return relpath(self.path, self.repo.root_dir)\n\n    @cached_property\n    def relpath(self) -> str:\n        return relpath(self.path)\n\n    @property\n    def is_data_source(self) -> bool:\n        \"\"\"Whether the DVC file was created with `dvc add` or `dvc import`\"\"\"\n        return self.cmd is None\n\n    @property\n    def is_callback(self) -> bool:\n        \"\"\"\n        A callback stage is always considered as changed,\n        so it runs on every `dvc repro` call.\n        \"\"\"\n        return self.cmd and not any((self.deps, self.outs))\n\n    @property\n    def is_import(self) -> bool:\n        \"\"\"Whether the DVC file was created with `dvc import`.\"\"\"\n        return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1\n\n    @property\n    def is_partial_import(self) -> bool:\n        \"\"\"\n        Whether the DVC file was created using `dvc import --no-download`\n        or `dvc import-url --no-download`.\n        \"\"\"\n        return self.is_import and (not self.outs[0].hash_info)\n\n    @property\n    def is_repo_import(self) -> bool:\n        if not self.is_import:\n            return False\n\n        from dvc.dependency import RepoDependency\n\n        return isinstance(self.deps[0], RepoDependency)\n\n    @property\n    def is_db_import(self) -> bool:\n        if not self.is_import:\n            return False\n\n        from dvc.dependency import DbDependency\n\n        return isinstance(self.deps[0], DbDependency)\n\n    @property\n    def is_versioned_import(self) -> bool:\n        from dvc.dependency import DbDependency\n\n        return (\n            self.is_import\n            and not isinstance(self.deps[0], DbDependency)\n            and self.deps[0].fs.version_aware\n        )\n\n    def short_description(self) -> Optional[\"str\"]:\n        desc: Optional[str] = None\n        if self.desc:\n            with suppress(ValueError):\n                # try to use first non-empty line as a description\n                line = next(filter(None, self.desc.splitlines()))\n                return line.strip()\n        return desc\n\n    def changed_deps(\n        self, allow_missing: bool = False, upstream: Optional[list] = None\n    ) -> bool:\n        if self.frozen:\n            return False\n\n        if self.is_callback or self.always_changed:\n            return True\n\n        return self._changed_deps(allow_missing=allow_missing, upstream=upstream)\n\n    @rwlocked(read=[\"deps\"])\n    def _changed_deps(\n        self, allow_missing: bool = False, upstream: Optional[list] = None\n    ) -> bool:\n        for dep in self.deps:\n            status = dep.status()\n            if status:\n                if allow_missing and status[str(dep)] == \"deleted\":\n                    if upstream and any(\n                        dep.fs_path == out.fs_path and dep.hash_info != out.hash_info\n                        for stage in upstream\n                        for out in stage.outs\n                    ):\n                        status[str(dep)] = \"modified\"\n                    else:\n                        continue\n                logger.debug(\n                    \"Dependency '%s' of %s changed because it is '%s'.\",\n                    dep,\n                    self,\n                    status[str(dep)],\n                )\n                return True\n        return False\n\n    @rwlocked(read=[\"outs\"])\n    def changed_outs(self, allow_missing: bool = False) -> bool:\n        for out in self.outs:\n            status = out.status()\n            if status:\n                if allow_missing and status[str(out)] in [\"not in cache\", \"deleted\"]:\n                    continue\n                logger.debug(\n                    \"Output '%s' of %s changed because it is '%s'.\",\n                    out,\n                    self,\n                    status[str(out)],\n                )\n                return True\n\n        return False\n\n    def changed_stage(self) -> bool:\n        changed = self.md5 != self.compute_md5()\n        if changed:\n            logger.debug(self._changed_stage_entry())\n        return changed\n\n    @rwlocked(read=[\"deps\", \"outs\"])\n    def changed(\n        self, allow_missing: bool = False, upstream: Optional[list] = None\n    ) -> bool:\n        is_changed = (\n            # Short-circuit order: stage md5 is fast,\n            # deps are expected to change\n            self.changed_stage()\n            or self.changed_deps(allow_missing=allow_missing, upstream=upstream)\n            or self.changed_outs(allow_missing=allow_missing)\n        )\n        if is_changed:\n            logger.debug(\"%s changed.\", self)\n        return is_changed\n\n    @rwlocked(write=[\"outs\"])\n    def remove_outs(self, ignore_remove=False, force=False) -> None:\n        \"\"\"Used mainly for `dvc remove --outs` and :func:`Stage.reproduce`.\"\"\"\n        for out in self.outs:\n            if out.persist and not force:\n                out.unprotect()\n                continue\n\n            logger.debug(\"Removing output '%s' of %s.\", out, self)\n            out.remove(ignore_remove=ignore_remove)\n\n    def unprotect_outs(self) -> None:\n        for out in self.outs:\n            out.unprotect()\n\n    def ignore_remove_outs(self) -> None:\n        for out in self.outs:\n            out.ignore_remove()\n\n    @rwlocked(write=[\"outs\"])\n    def remove(self, force=False, remove_outs=True, purge=True) -> None:\n        if remove_outs:\n            self.remove_outs(ignore_remove=True, force=force)\n        else:\n            self.unprotect_outs()\n            self.ignore_remove_outs()\n        if purge:\n            self.dvcfile.remove_stage(self)\n\n    def transfer(\n        self,\n        source: str,\n        odb: Optional[\"ObjectDB\"] = None,\n        to_remote: bool = False,\n        jobs: Optional[int] = None,\n        force: bool = False,\n    ) -> None:\n        assert len(self.outs) == 1\n        (out,) = self.outs\n        out.transfer(source, odb=odb, jobs=jobs)\n        if not to_remote:\n            out.checkout(force=force)\n            out.ignore()\n\n    @rwlocked(read=[\"deps\"], write=[\"outs\"])\n    def reproduce(self, interactive=False, **kwargs) -> Optional[\"Stage\"]:\n        force = kwargs.get(\"force\", False)\n        allow_missing = kwargs.get(\"allow_missing\", False)\n        pull = kwargs.get(\"pull\", False)\n        upstream = kwargs.pop(\"upstream\", None)\n        if force:\n            pass\n        # Skip stages with missing data if otherwise unchanged\n        elif not self.changed(allow_missing, upstream):\n            if not isinstance(self, PipelineStage) and self.is_data_source:\n                logger.info(\"'%s' didn't change, skipping\", self.addressing)\n            else:\n                logger.info(\"Stage '%s' didn't change, skipping\", self.addressing)\n            return None\n        # Pull stages with missing data if otherwise unchanged\n        elif not self.changed(True, upstream) and pull:\n            try:\n                logger.info(\"Pulling data for %s\", self)\n                self.repo.pull(self.addressing, jobs=kwargs.get(\"jobs\"))\n                self.checkout()\n                return None\n            except CheckoutError:\n                logger.info(\"Unable to pull data for %s\", self)\n\n        msg = f\"Going to reproduce {self}. Are you sure you want to continue?\"\n        if interactive and not prompt.confirm(msg):\n            raise DvcException(\"reproduction aborted by the user\")\n\n        self.run(**kwargs)\n\n        logger.debug(\"%s was reproduced\", self)\n\n        return self\n\n    def update(\n        self,\n        rev=None,\n        to_remote=False,\n        remote=None,\n        no_download=None,\n        jobs=None,\n    ) -> None:\n        if not (self.is_repo_import or self.is_import):\n            raise StageUpdateError(self.relpath)\n\n        # always force update DbDep since we don't know if it's changed\n        force = self.is_db_import\n        update_import(\n            self,\n            rev=rev,\n            to_remote=to_remote,\n            remote=remote,\n            no_download=no_download,\n            jobs=jobs,\n            force=force,\n        )\n\n    def reload(self) -> \"Stage\":\n        return self.dvcfile.stage\n\n    def dumpd(self, **kwargs) -> dict[str, Any]:\n        return get_dump(self, **kwargs)\n\n    def compute_md5(self) -> Optional[str]:\n        # `dvc add`ed files don't need stage md5\n        if self.is_data_source and not (self.is_import or self.is_repo_import):\n            m = None\n        else:\n            m = compute_md5(self)\n        logger.debug(\"Computed %s md5: '%s'\", self, m)\n        return m\n\n    def save(self, allow_missing: bool = False, run_cache: bool = True):\n        self.save_deps(allow_missing=allow_missing)\n\n        self.save_outs(allow_missing=allow_missing)\n\n        self.md5 = self.compute_md5()\n\n        if run_cache:\n            self.repo.stage_cache.save(self)\n\n    def save_deps(self, allow_missing=False):\n        from dvc.dependency.base import DependencyDoesNotExistError\n\n        for dep in self.deps:\n            try:\n                dep.save()\n            except DependencyDoesNotExistError:\n                if not allow_missing:\n                    raise\n\n    def save_outs(self, allow_missing: bool = False):\n        from dvc.output import OutputDoesNotExistError\n\n        for out in self.outs:\n            # old state just before saving so to merge them later\n            old_state = out._get_versioned_meta()\n            try:\n                out.save()\n            except OutputDoesNotExistError:\n                if not allow_missing:\n                    raise\n\n            if old_state:\n                out.merge_version_meta(*old_state)\n\n    def ignore_outs(self) -> None:\n        for out in self.outs:\n            out.ignore()\n\n    @staticmethod\n    def _changed_entries(entries) -> list[str]:\n        return [str(entry) for entry in entries if entry.workspace_status()]\n\n    def _changed_stage_entry(self) -> str:\n        return f\"'md5' of {self} changed.\"\n\n    def changed_entries(self) -> ChangedEntries:\n        changed_deps = self._changed_entries(self.deps)\n        changed_outs = self._changed_entries(self.outs)\n        return (\n            changed_deps,\n            changed_outs,\n            self._changed_stage_entry() if self.changed_stage() else None,\n        )\n\n    @rwlocked(write=[\"outs\"])\n    def commit(self, allow_missing=False, filter_info=None, **kwargs) -> None:\n        from dvc.output import OutputDoesNotExistError\n\n        link_failures = []\n        for out in self.filter_outs(filter_info):\n            try:\n                out.commit(filter_info=filter_info, **kwargs)\n            except OutputDoesNotExistError:\n                if not allow_missing:\n                    raise\n            except CacheLinkError:\n                link_failures.append(out.fs_path)\n        if link_failures:\n            raise CacheLinkError(link_failures)\n\n    @rwlocked(write=[\"outs\"])\n    def add_outs(self, filter_info=None, allow_missing: bool = False, **kwargs):\n        from dvc.output import OutputDoesNotExistError\n\n        link_failures = []\n        for out in self.filter_outs(filter_info):\n            # old state just before saving so to merge them later\n            old_state = out._get_versioned_meta()\n            try:\n                out.add(filter_info, **kwargs)\n            except (FileNotFoundError, OutputDoesNotExistError):\n                if not allow_missing:\n                    raise\n            except CacheLinkError:\n                link_failures.append(filter_info or out.fs_path)\n\n            if old_state:\n                out.merge_version_meta(*old_state)\n\n        if link_failures:\n            raise CacheLinkError(link_failures)\n\n    @rwlocked(read=[\"deps\", \"outs\"])\n    def run(\n        self,\n        dry=False,\n        no_commit=False,\n        force=False,\n        allow_missing=False,\n        no_download=False,\n        **kwargs,\n    ) -> None:\n        if (self.cmd or self.is_import) and not self.frozen and not dry:\n            self.remove_outs(ignore_remove=False, force=False)\n\n        if (self.is_import and not self.frozen) or self.is_partial_import:\n            self._sync_import(dry, force, kwargs.get(\"jobs\"), no_download)\n        elif not self.frozen and self.cmd:\n            self._run_stage(dry, force, **kwargs)\n        elif not dry:\n            args = (\"outputs\", \"frozen \") if self.frozen else (\"data sources\", \"\")\n            logger.info(\"Verifying %s in %s%s\", *args, self)\n            self._check_missing_outputs()\n\n        if not dry:\n            if no_download:\n                allow_missing = True\n\n            no_cache_outs = any(\n                not out.use_cache\n                for out in self.outs\n                if not (out.is_metric or out.is_plot)\n            )\n            self.save(\n                allow_missing=allow_missing,\n                run_cache=not no_commit and not no_cache_outs,\n            )\n\n            if no_download:\n                self.ignore_outs()\n            if not no_commit:\n                self.commit(allow_missing=allow_missing)\n\n    @rwlocked(read=[\"deps\"], write=[\"outs\"])\n    def _run_stage(self, dry, force, **kwargs) -> None:\n        return run_stage(self, dry, force, **kwargs)\n\n    @rwlocked(read=[\"deps\"], write=[\"outs\"])\n    def _sync_import(self, dry, force, jobs, no_download) -> None:\n        sync_import(self, dry, force, jobs, no_download)\n\n    @rwlocked(read=[\"outs\"])\n    def _check_missing_outputs(self) -> None:\n        check_missing_outputs(self)\n\n    def filter_outs(self, fs_path) -> Iterable[\"Output\"]:\n        def _func(o):\n            return o.fs.isin_or_eq(fs_path, o.fs_path)\n\n        return filter(_func, self.outs) if fs_path else self.outs\n\n    @rwlocked(write=[\"outs\"])\n    def checkout(\n        self, allow_missing: bool = False, **kwargs\n    ) -> dict[str, list[\"StrPath\"]]:\n        stats: dict[str, list[StrPath]] = defaultdict(list)\n        if self.is_partial_import:\n            return stats\n\n        for out in self.filter_outs(kwargs.get(\"filter_info\")):\n            key, outs = self._checkout(out, allow_missing=allow_missing, **kwargs)\n            if key:\n                stats[key].extend(outs)\n        return stats\n\n    @staticmethod\n    def _checkout(out, **kwargs) -> tuple[Optional[str], list[str]]:\n        try:\n            result = out.checkout(**kwargs)\n            added, modified = result or (None, None)\n            if not (added or modified):\n                return None, []\n            return \"modified\" if modified else \"added\", [str(out)]\n        except CheckoutError as exc:\n            return \"failed\", exc.target_infos\n\n    @rwlocked(read=[\"deps\", \"outs\"])\n    def status(\n        self, check_updates: bool = False, filter_info: Optional[bool] = None\n    ) -> dict[str, list[Union[str, dict[str, str]]]]:\n        ret: list[Union[str, dict[str, str]]] = []\n        show_import = (\n            self.is_repo_import or self.is_versioned_import\n        ) and check_updates\n\n        if not self.frozen or show_import:\n            self._status_deps(ret)\n        self._status_outs(ret, filter_info=filter_info)\n        self._status_always_changed(ret)\n        self._status_stage(ret)\n        return {self.addressing: ret} if ret else {}\n\n    @staticmethod\n    def _status(entries: Iterable[\"Output\"]) -> dict[str, str]:\n        ret = {}\n\n        for entry in entries:\n            ret.update(entry.status())\n\n        return ret\n\n    def _status_deps(self, ret) -> None:\n        deps_status = self._status(self.deps)\n        if deps_status:\n            ret.append({\"changed deps\": deps_status})\n\n    def _status_outs(self, ret, filter_info) -> None:\n        filter_outs = self.filter_outs(filter_info)\n        outs_status = self._status(filter_outs)\n        if outs_status:\n            ret.append({\"changed outs\": outs_status})\n\n    def _status_always_changed(self, ret) -> None:\n        if self.is_callback or self.always_changed:\n            ret.append(\"always changed\")\n\n    def _status_stage(self, ret) -> None:\n        if self.changed_stage():\n            ret.append(\"changed checksum\")\n\n    def already_cached(self) -> bool:\n        return not self.changed_stage() and self.deps_cached() and self.outs_cached()\n\n    def deps_cached(self) -> bool:\n        return all(not dep.changed() for dep in self.deps)\n\n    def outs_cached(self) -> bool:\n        return all(\n            not out.changed_cache() if out.use_cache else not out.changed()\n            for out in self.outs\n        )\n\n    def get_used_objs(\n        self, *args, **kwargs\n    ) -> dict[Optional[\"HashFileDB\"], set[\"HashInfo\"]]:\n        \"\"\"Return set of object IDs used by this stage.\"\"\"\n        if self.is_partial_import and not self.is_repo_import:\n            return {}\n\n        used_objs = defaultdict(set)\n        for out in self.filter_outs(kwargs.get(\"filter_info\")):\n            for odb, objs in out.get_used_objs(*args, **kwargs).items():\n                used_objs[odb].update(objs)\n        return used_objs\n\n    @staticmethod\n    def _check_can_merge(stage, ancestor_out=None) -> None:\n        if isinstance(stage, PipelineStage):\n            raise MergeError(\"unable to auto-merge pipeline stages\")\n\n        if not stage.is_data_source or stage.deps or len(stage.outs) > 1:\n            raise MergeError(\n                \"unable to auto-merge DVC files that weren't created by `dvc add`\"\n            )\n\n        if ancestor_out and not stage.outs:\n            raise MergeError(\"unable to auto-merge DVC files with deleted outputs\")\n\n    def merge(self, ancestor, other, allowed=None) -> None:\n        assert other\n\n        if not other.outs:\n            return\n\n        if not self.outs:\n            self.outs = other.outs\n            return\n\n        if ancestor:\n            self._check_can_merge(ancestor)\n            outs = ancestor.outs\n            ancestor_out = outs[0] if outs else None\n        else:\n            ancestor_out = None\n\n        self._check_can_merge(self, ancestor_out)\n        self._check_can_merge(other, ancestor_out)\n\n        self.outs[0].merge(ancestor_out, other.outs[0], allowed=allowed)\n\n    def dump(self, **kwargs) -> None:\n        self.dvcfile.dump(self, **kwargs)\n\n\nclass PipelineStage(Stage):\n    def __init__(self, *args, name: Optional[str] = None, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.name = name\n        self.cmd_changed = False\n        self.tracked_vars: dict[str, dict[str, dict[str, str]]] = {}\n\n    def __eq__(self, other):\n        return super().__eq__(other) and self.name == other.name\n\n    def __hash__(self) -> int:\n        return hash((self.path_in_repo, self.name))\n\n    @property\n    def addressing(self):\n        from dvc.dvcfile import PROJECT_FILE\n\n        if self.path and self.relpath == PROJECT_FILE:\n            return self.name\n        return f\"{super().addressing}:{self.name}\"\n\n    def reload(self) -> Stage:\n        from dvc.dvcfile import ProjectFile\n\n        assert isinstance(self.dvcfile, ProjectFile)\n\n        self.dvcfile._reset()\n        return self.dvcfile.stages[self.name]\n\n    def _status_stage(self, ret) -> None:\n        if self.cmd_changed:\n            ret.append(\"changed command\")\n\n    def changed_stage(self) -> bool:\n        if self.cmd_changed:\n            logger.debug(self._changed_stage_entry())\n        return self.cmd_changed\n\n    def _changed_stage_entry(self) -> str:\n        return f\"'cmd' of {self} has changed.\"\n\n    def merge(self, ancestor, other, allowed=None):\n        raise NotImplementedError\n"
  },
  {
    "path": "dvc/stage/cache.py",
    "content": "import os\nfrom contextlib import contextmanager\nfrom typing import TYPE_CHECKING, Optional\n\nfrom funcy import first\n\nfrom dvc import fs\nfrom dvc.config import RemoteConfigError\nfrom dvc.exceptions import CollectCacheError, DvcException\nfrom dvc.log import logger\nfrom dvc.utils import dict_sha256, relpath\n\nif TYPE_CHECKING:\n    from dvc_objects.db import ObjectDB\n\nlogger = logger.getChild(__name__)\n\n\nclass RunCacheNotFoundError(DvcException):\n    def __init__(self, stage):\n        super().__init__(f\"No run-cache for {stage.addressing}\")\n\n\nclass RunCacheNotSupported(DvcException):\n    pass\n\n\ndef _get_cache_hash(cache, key=False):\n    from dvc_data.hashfile.meta import Meta\n\n    if key:\n        cache[\"outs\"] = [out[\"path\"] for out in cache.get(\"outs\", [])]\n    return dict_sha256(cache, exclude=[Meta.PARAM_SIZE, Meta.PARAM_NFILES])\n\n\ndef _can_hash(stage):\n    if stage.is_callback or stage.always_changed:\n        return False\n\n    if not all([stage.cmd, stage.deps, stage.outs]):\n        return False\n\n    for dep in stage.deps:\n        if not (dep.protocol == \"local\" and dep.def_path and dep.get_hash()):\n            return False\n\n    for out in stage.outs:\n        if (\n            out.protocol != \"local\"\n            or not out.def_path\n            or out.persist\n            or not out.is_in_repo\n        ):\n            return False\n\n    return True\n\n\ndef _get_stage_hash(stage):\n    from .serialize import to_single_stage_lockfile\n\n    assert _can_hash(stage)\n    return _get_cache_hash(to_single_stage_lockfile(stage), key=True)\n\n\nclass StageCache:\n    def __init__(self, repo):\n        self.repo = repo\n        self.cache_dir = os.path.join(self.repo.cache.legacy.path, \"runs\")\n\n    def _get_cache_dir(self, key):\n        return os.path.join(self.cache_dir, key[:2], key)\n\n    def _get_cache_path(self, key, value):\n        return os.path.join(self._get_cache_dir(key), value)\n\n    def _load_cache(self, key, value):\n        from voluptuous import Invalid\n\n        from dvc.schema import COMPILED_LOCK_FILE_STAGE_SCHEMA\n        from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml\n\n        path = self._get_cache_path(key, value)\n\n        try:\n            return COMPILED_LOCK_FILE_STAGE_SCHEMA(load_yaml(path))\n        except FileNotFoundError:\n            return None\n        except (YAMLFileCorruptedError, Invalid):\n            logger.warning(\"corrupted cache file '%s'.\", relpath(path))\n            os.unlink(path)\n            return None\n\n    def _load(self, stage):\n        key = _get_stage_hash(stage)\n        if not key:\n            return None\n\n        cache_dir = self._get_cache_dir(key)\n        if not os.path.exists(cache_dir):\n            return None\n\n        newest_entry = first(\n            sorted(\n                os.listdir(cache_dir),\n                key=lambda f: os.path.getmtime(os.path.join(cache_dir, f)),\n                reverse=True,\n            )\n        )\n        cache = self._load_cache(key, newest_entry)\n        if cache:\n            return cache\n\n        return None\n\n    def _create_stage(self, cache, wdir=None):\n        from . import PipelineStage, create_stage\n        from .loader import StageLoader\n\n        stage = create_stage(\n            PipelineStage,\n            repo=self.repo,\n            path=\"dvc.yaml\",\n            cmd=cache[\"cmd\"],\n            wdir=wdir,\n            outs=[out[\"path\"] for out in cache[\"outs\"]],\n        )\n        StageLoader.fill_from_lock(stage, cache)\n        return stage\n\n    @contextmanager\n    def _cache_type_copy(self):\n        cache_types = self.repo.cache.local.cache_types\n        legacy_cache_types = self.repo.cache.legacy.cache_types\n        self.repo.cache.local.cache_types = [\"copy\"]\n        self.repo.cache.legacy.cache_types = [\"copy\"]\n        try:\n            yield\n        finally:\n            self.repo.cache.local.cache_types = cache_types\n            self.repo.cache.legacy.cache_types = legacy_cache_types\n\n    def _uncached_outs(self, stage, cache):\n        # NOTE: using temporary stage to avoid accidentally modifying original\n        # stage and to workaround `commit/checkout` not working for uncached\n        # outputs.\n        cached_stage = self._create_stage(cache, wdir=stage.wdir)\n\n        outs_no_cache = [out.def_path for out in stage.outs if not out.use_cache]\n\n        # NOTE: using copy link to make it look like a git-tracked file\n        with self._cache_type_copy():\n            for out in cached_stage.outs:\n                if out.def_path in outs_no_cache and out.is_in_repo:\n                    yield out\n\n    def save(self, stage):\n        from .serialize import to_single_stage_lockfile\n\n        if not _can_hash(stage):\n            return\n\n        cache_key = _get_stage_hash(stage)\n        cache = to_single_stage_lockfile(stage)\n        cache_value = _get_cache_hash(cache)\n\n        existing_cache = self._load_cache(cache_key, cache_value)\n        cache = existing_cache or cache\n\n        for out in self._uncached_outs(stage, cache):\n            out.commit()\n\n        if existing_cache:\n            return\n\n        from dvc.schema import COMPILED_LOCK_FILE_STAGE_SCHEMA\n        from dvc.utils.serialize import dump_yaml\n\n        # sanity check\n        COMPILED_LOCK_FILE_STAGE_SCHEMA(cache)\n\n        path = self._get_cache_path(cache_key, cache_value)\n        local_fs = self.repo.cache.legacy.fs\n        parent = local_fs.parent(path)\n        self.repo.cache.legacy.makedirs(parent)\n        tmp = local_fs.join(parent, fs.utils.tmp_fname())\n        assert os.path.exists(parent)\n        assert os.path.isdir(parent)\n        dump_yaml(tmp, cache)\n        self.repo.cache.legacy.move(tmp, path)\n\n    def restore(self, stage, run_cache=True, pull=False, dry=False):  # noqa: C901\n        from .serialize import to_single_stage_lockfile\n\n        if not _can_hash(stage):\n            raise RunCacheNotFoundError(stage)\n\n        if (\n            not stage.changed_stage()\n            and stage.deps_cached()\n            and all(bool(out.hash_info) for out in stage.outs)\n        ):\n            cache = to_single_stage_lockfile(stage)\n        else:\n            if not run_cache:  # backward compatibility\n                raise RunCacheNotFoundError(stage)\n            if not dry:\n                stage.save_deps()\n            cache = self._load(stage)\n            if not cache:\n                raise RunCacheNotFoundError(stage)\n\n        cached_stage = self._create_stage(cache, wdir=stage.wdir)\n\n        if pull and not dry:\n            try:\n                for objs in cached_stage.get_used_objs().values():\n                    self.repo.cloud.pull(objs)\n            except CollectCacheError as exc:\n                raise RunCacheNotFoundError(stage) from exc\n\n        if not cached_stage.outs_cached():\n            raise RunCacheNotFoundError(stage)\n\n        logger.info(\n            \"Stage '%s' is cached - skipping run, checking out outputs\",\n            stage.addressing,\n        )\n        if not dry:\n            cached_stage.checkout()\n\n    def transfer(self, from_odb, to_odb, force=True):\n        from dvc.fs import HTTPFileSystem, LocalFileSystem\n        from dvc.fs.callbacks import TqdmCallback\n\n        from_fs = from_odb.fs\n        to_fs = to_odb.fs\n        func = fs.generic.log_exceptions(fs.generic.copy)\n        runs = from_fs.join(from_odb.path, \"runs\")\n\n        http_odb = next(\n            (odb for odb in (from_odb, to_odb) if isinstance(odb.fs, HTTPFileSystem)),\n            None,\n        )\n        if http_odb:\n            path = http_odb.path\n            message = f\"run-cache is not supported for http filesystem: {path}\"\n            raise RunCacheNotSupported(message)\n\n        ret: list[tuple[str, str]] = []\n        if not from_fs.exists(runs):\n            return ret\n\n        for src in from_fs.find(runs):\n            rel = from_fs.relpath(src, from_odb.path)\n            if not isinstance(to_fs, LocalFileSystem):\n                rel = from_fs.as_posix(rel)\n\n            dst = to_fs.join(to_odb.path, rel)\n            key = to_fs.parent(dst)\n\n            # check if any build cache already exists for this key\n            # TODO: check if MaxKeys=1 or something like that applies\n            # or otherwise this will take a lot of time!\n            if not force and to_fs.exists(key) and first(to_fs.find(key)):\n                continue\n\n            src_name = from_fs.name(src)\n            parent_name = from_fs.name(from_fs.parent(src))\n            with TqdmCallback(desc=src_name, bytes=True) as cb:\n                func(from_fs, src, to_fs, dst, callback=cb)\n            ret.append((parent_name, src_name))\n        return ret\n\n    def push(self, remote: Optional[str], odb: Optional[\"ObjectDB\"] = None):\n        try:\n            dest_odb = odb or self.repo.cloud.get_remote_odb(\n                remote, \"push --run-cache\", hash_name=\"md5-dos2unix\"\n            )\n        except RemoteConfigError as e:\n            raise RunCacheNotSupported(e) from e\n        return self.transfer(self.repo.cache.legacy, dest_odb)\n\n    def pull(self, remote: Optional[str], odb: Optional[\"ObjectDB\"] = None):\n        try:\n            odb = odb or self.repo.cloud.get_remote_odb(\n                remote, \"fetch --run-cache\", hash_name=\"md5-dos2unix\"\n            )\n        except RemoteConfigError as e:\n            raise RunCacheNotSupported(e) from e\n        return self.transfer(odb, self.repo.cache.legacy)\n\n    def get_used_objs(self, used_run_cache, *args, **kwargs):\n        \"\"\"Return used cache for the specified run-cached stages.\"\"\"\n        from collections import defaultdict\n\n        used_objs = defaultdict(set)\n        for key, value in used_run_cache:\n            entry = self._load_cache(key, value)\n            if not entry:\n                continue\n            stage = self._create_stage(entry)\n            for odb, objs in stage.get_used_objs(*args, **kwargs).items():\n                used_objs[odb].update(objs)\n        return used_objs\n"
  },
  {
    "path": "dvc/stage/decorators.py",
    "content": "from functools import wraps\n\nfrom funcy import decorator\n\n\n@decorator\ndef rwlocked(call, read=None, write=None):\n    import sys\n\n    from dvc.dependency.db import AbstractDependency\n    from dvc.dependency.repo import RepoDependency\n    from dvc.rwlock import rwlock\n\n    if read is None:\n        read = []\n\n    if write is None:\n        write = []\n\n    stage = call._args[0]\n\n    assert stage.repo.lock.is_locked\n\n    def _chain(names):\n        return [\n            item.fs_path\n            for attr in names\n            for item in getattr(stage, attr)\n            # There is no need to lock RepoDependency deps, as there is no\n            # corresponding OutputREPO, so we can't even write it.\n            if not isinstance(item, (RepoDependency, AbstractDependency))\n        ]\n\n    cmd = \" \".join(sys.argv)\n\n    with rwlock(\n        stage.repo.tmp_dir,\n        stage.repo.fs,\n        cmd,\n        _chain(read),\n        _chain(write),\n        stage.repo.config[\"core\"].get(\"hardlink_lock\", False),\n    ):\n        return call()\n\n\ndef unlocked_repo(f):\n    @wraps(f)\n    def wrapper(stage, *args, **kwargs):\n        stage.repo.lock.unlock()\n        stage.repo._reset()\n        try:\n            ret = f(stage, *args, **kwargs)\n        finally:\n            stage.repo.lock.lock()\n        return ret\n\n    return wrapper\n\n\ndef relock_repo(f):\n    @wraps(f)\n    def wrapper(stage, *args, **kwargs):\n        stage.repo.lock.lock()\n        try:\n            ret = f(stage, *args, **kwargs)\n        finally:\n            stage.repo.lock.unlock()\n            stage.repo._reset()\n        return ret\n\n    return wrapper\n"
  },
  {
    "path": "dvc/stage/exceptions.py",
    "content": "from dvc.exceptions import DvcException\n\n\nclass StageCmdFailedError(DvcException):\n    def __init__(self, cmd, status=None):\n        msg = f\"failed to run: {cmd}\"\n        if status is not None:\n            msg += f\", exited with {status}\"\n        super().__init__(msg)\n\n\nclass StageFileDoesNotExistError(DvcException):\n    DVC_IGNORED = \"is dvc-ignored\"\n    DOES_NOT_EXIST = \"does not exist\"\n\n    def __init__(self, fname, dvc_ignored=False):\n        self.file = fname\n        message = self.DVC_IGNORED if dvc_ignored else self.DOES_NOT_EXIST\n        super().__init__(f\"'{self.file}' {message}\")\n\n\nclass StageFileAlreadyExistsError(DvcException):\n    pass\n\n\nclass StageFileIsNotDvcFileError(DvcException):\n    def __init__(self, fname):\n        from dvc.dvcfile import DVC_FILE_SUFFIX, is_dvc_file\n\n        msg = f\"'{fname}' is not a .dvc file\"\n\n        sname = fname + DVC_FILE_SUFFIX\n        if is_dvc_file(sname):\n            msg += f\". Do you mean '{sname}'?\"\n\n        super().__init__(msg)\n\n\nclass StageFileBadNameError(DvcException):\n    pass\n\n\nclass StagePathOutsideError(DvcException):\n    pass\n\n\nclass StagePathNotFoundError(DvcException):\n    pass\n\n\nclass StagePathNotDirectoryError(DvcException):\n    pass\n\n\nclass StageCommitError(DvcException):\n    pass\n\n\nclass StageExternalOutputsError(DvcException):\n    pass\n\n\nclass StageUpdateError(DvcException):\n    def __init__(self, path):\n        super().__init__(f\"update is not supported for '{path}' that is not an import.\")\n\n\nclass MissingDataSource(DvcException):\n    def __init__(self, missing_files):\n        assert len(missing_files) > 0\n\n        source = \"source\"\n        if len(missing_files) > 1:\n            source += \"s\"\n\n        msg = \"missing data '{}': {}\".format(source, \", \".join(missing_files))\n        super().__init__(msg)\n\n\nclass DataSourceChanged(DvcException):\n    def __init__(self, path: str):\n        super().__init__(f\"data source changed: {path}\")\n\n\nclass StageNotFound(DvcException, KeyError):  # noqa: N818\n    def __init__(self, file, name):\n        self.file = file.relpath\n        self.name = name\n        super().__init__(f\"Stage '{self.name}' not found inside '{self.file}' file\")\n\n    def __str__(self):\n        # `KeyError` quotes the message\n        # see: https://bugs.python.org/issue2651\n        return self.msg\n\n\nclass StageNameUnspecified(DvcException):\n    def __init__(self, file):\n        super().__init__(\n            \"Stage name not provided.\"\n            f\"Please specify the name as: `{file.relpath}:stage_name`\"\n        )\n\n\nclass DuplicateStageName(DvcException):\n    pass\n\n\nclass InvalidStageName(DvcException):\n    def __init__(self):\n        super().__init__(\"Stage name cannot contain punctuation characters.\")\n"
  },
  {
    "path": "dvc/stage/imports.py",
    "content": "from dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\ndef _update_import_on_remote(stage, remote, jobs):\n    if stage.is_repo_import:\n        raise InvalidArgumentError(\n            \"Data imported from other DVC or Git repositories can't \"\n            \"be updated with --to-remote\"\n        )\n\n    stage.save_deps()\n    url = stage.deps[0].def_path\n    odb = stage.repo.cloud.get_remote_odb(remote, \"update\")\n    stage.outs[0].transfer(url, odb=odb, jobs=jobs, update=True)\n\n\ndef update_import(\n    stage,\n    rev=None,\n    to_remote=False,\n    remote=None,\n    no_download=None,\n    jobs=None,\n    force=False,\n):\n    stage.deps[0].update(rev=rev)\n\n    frozen = stage.frozen\n    stage.frozen = False\n    changed = stage.changed()\n\n    try:\n        if to_remote:\n            _update_import_on_remote(stage, remote, jobs)\n        else:\n            stage.reproduce(no_download=no_download, jobs=jobs, force=force)\n    finally:\n        if no_download and changed:\n            # Avoid retaining stale information\n            stage.outs[0].clear()\n        stage.frozen = frozen\n\n\ndef sync_import(stage, dry=False, force=False, jobs=None, no_download=False):\n    \"\"\"Synchronize import's outs to the workspace.\"\"\"\n    logger.info(\"Importing '%s' -> '%s'\", stage.deps[0], stage.outs[0])\n    if dry:\n        return\n\n    if not force and stage.already_cached():\n        stage.outs[0].checkout()\n    else:\n        stage.save_deps()\n        if no_download:\n            if stage.is_repo_import or stage.is_db_import:\n                stage.deps[0].update()\n        else:\n            stage.deps[0].download(stage.outs[0], jobs=jobs)\n"
  },
  {
    "path": "dvc/stage/loader.py",
    "content": "from collections.abc import Mapping\nfrom copy import deepcopy\nfrom itertools import chain\nfrom typing import TYPE_CHECKING, Any, Optional\n\nfrom funcy import get_in, lcat, once, project\n\nfrom dvc import dependency, output\nfrom dvc.log import logger\nfrom dvc.parsing import FOREACH_KWD, JOIN, MATRIX_KWD, EntryNotFound\nfrom dvc.utils.objects import cached_property\nfrom dvc_data.hashfile.meta import Meta\n\nfrom . import PipelineStage, Stage, loads_from\nfrom .exceptions import StageNameUnspecified, StageNotFound\nfrom .params import StageParams\nfrom .utils import fill_stage_dependencies, resolve_paths\n\nif TYPE_CHECKING:\n    from collections.abc import Iterable\n\n    from dvc.dvcfile import ProjectFile, SingleStageFile\n\nlogger = logger.getChild(__name__)\n\n\nclass StageLoader(Mapping):\n    def __init__(self, dvcfile: \"ProjectFile\", data, lockfile_data=None):\n        self.dvcfile = dvcfile\n        self.resolver = self.dvcfile.resolver\n        self.data = data or {}\n        self.stages_data = self.data.get(\"stages\", {})\n        self.repo = self.dvcfile.repo\n\n        lockfile_data = lockfile_data or {}\n        self._lockfile_data = lockfile_data.get(\"stages\", {})\n\n    @cached_property\n    def lockfile_data(self) -> dict[str, Any]:\n        if not self._lockfile_data:\n            logger.debug(\"Lockfile for '%s' not found\", self.dvcfile.relpath)\n        return self._lockfile_data\n\n    @staticmethod\n    def fill_from_lock(stage, lock_data=None):\n        \"\"\"Fill values for params, checksums for outs and deps from lock.\"\"\"\n        if not lock_data:\n            return\n\n        from dvc.output import Output, merge_file_meta_from_cloud\n\n        assert isinstance(lock_data, dict)\n        items: Iterable[tuple[str, Output]] = chain(\n            ((StageParams.PARAM_DEPS, dep) for dep in stage.deps),\n            ((StageParams.PARAM_OUTS, out) for out in stage.outs),\n        )\n\n        checksums = {\n            key: {item[\"path\"]: item for item in lock_data.get(key, {})}\n            for key in [StageParams.PARAM_DEPS, StageParams.PARAM_OUTS]\n        }\n        for key, item in items:\n            path = item.def_path\n            if isinstance(item, dependency.ParamsDependency):\n                item.fill_values(get_in(lock_data, [stage.PARAM_PARAMS, path]))\n                continue\n            info = get_in(checksums, [key, path], {})\n            info = info.copy()\n            info.pop(\"path\", None)\n\n            if isinstance(item, dependency.DatasetDependency):\n                item.fill_values(info.get(dependency.DatasetDependency.PARAM_DATASET))\n                continue\n\n            hash_name = info.pop(Output.PARAM_HASH, None)\n            item.meta = Meta.from_dict(merge_file_meta_from_cloud(info))\n            item.hash_name, item.hash_info = item._compute_hash_info_from_meta(\n                hash_name\n            )\n            files = get_in(checksums, [key, path, item.PARAM_FILES], None)\n            if files:\n                item.files = [merge_file_meta_from_cloud(f) for f in files]\n            item._compute_meta_hash_info_from_files()\n\n    @classmethod\n    def load_stage(cls, dvcfile: \"ProjectFile\", name, stage_data, lock_data=None):\n        assert all([name, dvcfile, dvcfile.repo, dvcfile.path])\n        assert stage_data\n        assert isinstance(stage_data, dict)\n\n        path, wdir = resolve_paths(\n            dvcfile.repo.fs, dvcfile.path, stage_data.get(Stage.PARAM_WDIR)\n        )\n        stage = loads_from(PipelineStage, dvcfile.repo, path, wdir, stage_data)\n        stage.name = name\n        stage.desc = stage_data.get(Stage.PARAM_DESC)\n        stage.meta = stage_data.get(Stage.PARAM_META)\n\n        deps = project(stage_data, [stage.PARAM_DEPS, stage.PARAM_PARAMS])\n        fill_stage_dependencies(stage, **deps)\n\n        outs = project(\n            stage_data,\n            [\n                stage.PARAM_OUTS,\n                stage.PARAM_METRICS,\n                stage.PARAM_PLOTS,\n            ],\n        )\n        stage.outs = lcat(\n            output.load_from_pipeline(stage, data, typ=key)\n            for key, data in outs.items()\n        )\n\n        if lock_data:\n            stage.cmd_changed = lock_data.get(Stage.PARAM_CMD) != stage.cmd\n\n        cls.fill_from_lock(stage, lock_data)\n        return stage\n\n    @once\n    def lockfile_needs_update(self):\n        # if lockfile does not have all of the entries that dvc.yaml says it\n        # should have, provide a debug message once\n\n        lockfile = self.dvcfile._lockfile.relpath\n        logger.debug(\"Lockfile '%s' needs to be updated.\", lockfile)\n\n    def __getitem__(self, name):\n        if not name:\n            raise StageNameUnspecified(self.dvcfile)\n\n        try:\n            resolved_data = self.resolver.resolve_one(name)\n        except EntryNotFound:\n            raise StageNotFound(self.dvcfile, name)  # noqa: B904\n\n        if self.lockfile_data and name not in self.lockfile_data:\n            self.lockfile_needs_update()\n            logger.trace(\"No lock entry found for '%s:%s'\", self.dvcfile.relpath, name)\n\n        resolved_stage = resolved_data[name]\n        stage = self.load_stage(\n            self.dvcfile,\n            name,\n            resolved_stage,\n            self.lockfile_data.get(name, {}),\n        )\n\n        stage.tracked_vars = self.resolver.tracked_vars.get(name, {})\n        group, *keys = name.rsplit(JOIN, maxsplit=1)\n        if group and keys and name not in self.stages_data:\n            stage.raw_data.generated_from = group\n\n        stage.raw_data.parametrized = self.stages_data.get(name, {}) != resolved_stage\n        return stage\n\n    def __iter__(self):\n        return iter(self.resolver.get_keys())\n\n    def __len__(self):\n        return len(self.resolver.get_keys())\n\n    def __contains__(self, name):\n        return self.resolver.has_key(name)\n\n    def is_foreach_or_matrix_generated(self, name: str) -> bool:\n        return (\n            name in self.stages_data\n            and {FOREACH_KWD, MATRIX_KWD} & self.stages_data[name].keys()\n        )\n\n\nclass SingleStageLoader(Mapping):\n    def __init__(\n        self,\n        dvcfile: \"SingleStageFile\",\n        stage_data: dict[Any, str],\n        stage_text: Optional[str] = None,\n    ):\n        self.dvcfile = dvcfile\n        self.stage_data = stage_data or {}\n        self.stage_text = stage_text\n\n    def __getitem__(self, item):\n        if item:\n            logger.warning(\n                \"Ignoring name '%s' for single stage in '%s'.\", item, self.dvcfile\n            )\n        # during `load`, we remove attributes from stage data, so as to\n        # not duplicate, therefore, for MappingView, we need to deepcopy.\n        return self.load_stage(self.dvcfile, deepcopy(self.stage_data), self.stage_text)\n\n    @classmethod\n    def load_stage(\n        cls,\n        dvcfile: \"SingleStageFile\",\n        d: dict[str, Any],\n        stage_text: Optional[str],\n    ) -> Stage:\n        path, wdir = resolve_paths(\n            dvcfile.repo.fs, dvcfile.path, d.get(Stage.PARAM_WDIR)\n        )\n        stage = loads_from(Stage, dvcfile.repo, path, wdir, d)\n        stage._stage_text = stage_text\n        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS) or [])\n        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS) or [])\n        return stage\n\n    def __iter__(self):\n        return iter([None])\n\n    def __contains__(self, item):\n        return False\n\n    def __len__(self):\n        return 1\n"
  },
  {
    "path": "dvc/stage/params.py",
    "content": "class StageParams:\n    PARAM_MD5 = \"md5\"\n    PARAM_CMD = \"cmd\"\n    PARAM_WDIR = \"wdir\"\n    PARAM_DEPS = \"deps\"\n    PARAM_OUTS = \"outs\"\n    PARAM_LOCKED = \"locked\"  # backward compatibility\n    PARAM_FROZEN = \"frozen\"\n    PARAM_META = \"meta\"\n    PARAM_ALWAYS_CHANGED = \"always_changed\"\n    PARAM_PARAMS = \"params\"\n    PARAM_METRICS = \"metrics\"\n    PARAM_PLOTS = \"plots\"\n    PARAM_DESC = \"desc\"\n"
  },
  {
    "path": "dvc/stage/run.py",
    "content": "import os\nimport signal\nimport subprocess\nimport threading\nfrom functools import cache\n\nfrom packaging.version import InvalidVersion, Version\n\nfrom dvc.log import logger\nfrom dvc.utils import fix_env\n\nfrom .decorators import unlocked_repo\nfrom .exceptions import StageCmdFailedError\n\nlogger = logger.getChild(__name__)\n\n\n@cache\ndef _fish_supports_no_config(executable) -> bool:\n    \"\"\"\n    Check if the fish shell supports the --no-config option.\n\n    Parameters:\n    executable (str): The path to the fish shell executable.\n\n    Returns:\n    bool: True if the fish version is greater than 3.3.0, False otherwise.\n    \"\"\"\n    try:\n        output = subprocess.check_output(  # noqa: S603\n            [executable, \"--version\"],\n            text=True,\n        )\n        version = Version(output.split(\" \")[-1].strip())\n        version_to_check = Version(\"3.3.0\")\n        return version >= version_to_check\n    except (subprocess.CalledProcessError, IndexError, InvalidVersion):\n        logger.trace(\"could not check fish version, defaulting to False\")\n        return False\n\n\ndef _warn_if_fish(executable):\n    if (\n        executable is None\n        or os.path.basename(executable) != \"fish\"\n        or _fish_supports_no_config(executable)\n    ):\n        return\n    logger.warning(\n        \"DVC detected that you are using a version of fish shell below 3.3.0 \"\n        \"Be aware that it might cause problems by overwriting \"\n        \"your current environment variables with values defined \"\n        \"in 'config.fish', which might affect your command. See \"\n        \"https://github.com/treeverse/dvc/issues/1307. \"\n    )\n\n\ndef _make_cmd(executable, cmd):\n    if executable is None:\n        return cmd\n    opts = {\n        \"zsh\": [\"--no-rcs\"],\n        \"bash\": [\"--noprofile\", \"--norc\"],\n        \"fish\": [],\n    }\n    name = os.path.basename(executable).lower()\n    opt = opts.get(name, [])\n    if name == \"fish\" and _fish_supports_no_config(executable):\n        opt.append(\"--no-config\")\n    return [executable, *opt, \"-c\", cmd]\n\n\ndef _enforce_cmd_list(cmd):\n    assert cmd\n    return cmd if isinstance(cmd, list) else cmd.splitlines()\n\n\ndef prepare_kwargs(stage, run_env=None):\n    from dvc.env import DVC_ROOT, DVC_STAGE\n\n    kwargs = {\"cwd\": stage.wdir, \"env\": fix_env(None), \"close_fds\": True}\n\n    if run_env:\n        kwargs[\"env\"].update(run_env)\n    if DVC_ROOT not in kwargs[\"env\"]:\n        kwargs[\"env\"][DVC_ROOT] = stage.repo.root_dir\n\n    # Create DVC_STAGE env variable for every command\n    kwargs[\"env\"][DVC_STAGE] = stage.addressing\n\n    # NOTE: when you specify `shell=True`, `Popen` [1] will default to\n    # `/bin/sh` on *nix and will add [\"/bin/sh\", \"-c\"] to your command.\n    # But we actually want to run the same shell that we are running\n    # from right now, which is usually determined by the `SHELL` env\n    # var. So instead, we compose our command on our own, making sure\n    # to include special flags to prevent shell from reading any\n    # configs and modifying env, which may change the behavior or the\n    # command we are running. See [2] for more info.\n    #\n    # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py\n    #                                                            #L1426\n    # [2] https://github.com/treeverse/dvc/issues/2506\n    #                                           #issuecomment-535396799\n    kwargs[\"shell\"] = os.name == \"nt\"\n    return kwargs\n\n\ndef display_command(cmd):\n    logger.info(\"%s %s\", \">\", cmd)\n\n\ndef get_executable():\n    return (os.getenv(\"SHELL\") or \"/bin/sh\") if os.name != \"nt\" else None\n\n\ndef _run(executable, cmd, **kwargs):\n    main_thread = isinstance(\n        threading.current_thread(),\n        threading._MainThread,  # type: ignore[attr-defined]\n    )\n    old_handler = None\n\n    exec_cmd = _make_cmd(executable, cmd)\n\n    try:\n        p = subprocess.Popen(exec_cmd, **kwargs)  # noqa: S603\n        if main_thread:\n            old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)\n\n        p.communicate()\n\n        if p.returncode != 0:\n            raise StageCmdFailedError(cmd, p.returncode)\n    finally:\n        if old_handler:\n            signal.signal(signal.SIGINT, old_handler)\n\n\ndef cmd_run(stage, dry=False, run_env=None):\n    logger.info(\"Running stage '%s':\", stage.addressing)\n    commands = _enforce_cmd_list(stage.cmd)\n    kwargs = prepare_kwargs(stage, run_env=run_env)\n    executable = get_executable()\n\n    if not dry:\n        _warn_if_fish(executable)\n\n    for cmd in commands:\n        display_command(cmd)\n        if dry:\n            continue\n\n        _run(executable, cmd, **kwargs)\n\n\ndef _pull_missing_deps(stage):\n    from dvc.dependency import DatasetDependency, DbDependency\n\n    for dep in stage.deps:\n        if isinstance(dep, (DatasetDependency, DbDependency)):\n            continue\n        if not dep.exists:\n            stage.repo.pull(dep.def_path)\n\n\ndef run_stage(stage, dry=False, force=False, run_env=None, **kwargs):\n    if not force:\n        if kwargs.get(\"pull\") and not dry:\n            _pull_missing_deps(stage)\n\n        from .cache import RunCacheNotFoundError\n\n        try:\n            stage.repo.stage_cache.restore(stage, dry=dry, **kwargs)\n            if not dry:\n                return\n        except RunCacheNotFoundError:\n            if not dry:\n                stage.save_deps()\n\n    run = cmd_run if dry else unlocked_repo(cmd_run)\n    run(stage, dry=dry, run_env=run_env)\n"
  },
  {
    "path": "dvc/stage/serialize.py",
    "content": "from collections import OrderedDict\nfrom collections.abc import Iterable\nfrom operator import attrgetter\nfrom typing import TYPE_CHECKING, Any, Optional, Union, no_type_check\n\nfrom funcy import post_processing\n\nfrom dvc.dependency import ParamsDependency\nfrom dvc.output import Annotation, Output\nfrom dvc.utils.collections import apply_diff\nfrom dvc.utils.serialize import parse_yaml_for_update\n\nfrom .params import StageParams\nfrom .utils import resolve_wdir, split_params_deps\n\nif TYPE_CHECKING:\n    from dvc.stage import PipelineStage, Stage\n\nPARAM_PARAMS = ParamsDependency.PARAM_PARAMS\nPARAM_PATH = ParamsDependency.PARAM_PATH\n\nPARAM_DEPS = StageParams.PARAM_DEPS\nPARAM_OUTS = StageParams.PARAM_OUTS\n\nPARAM_CACHE = Output.PARAM_CACHE\nPARAM_METRIC = Output.PARAM_METRIC\nPARAM_PLOT = Output.PARAM_PLOT\nPARAM_PERSIST = Output.PARAM_PERSIST\nPARAM_DESC = Annotation.PARAM_DESC\nPARAM_REMOTE = Output.PARAM_REMOTE\nPARAM_PUSH = Output.PARAM_PUSH\n\nDEFAULT_PARAMS_FILE = ParamsDependency.DEFAULT_PARAMS_FILE\n\n\n@post_processing(OrderedDict)\ndef _get_flags(out):\n    annot = out.annot.to_dict()\n    yield from annot.items()\n\n    if not out.use_cache:\n        yield PARAM_CACHE, False\n    if out.persist:\n        yield PARAM_PERSIST, True\n    if out.plot and isinstance(out.plot, dict):\n        # notice `out.plot` is not sorted\n        # `out.plot` is in the same order as is in the file when read\n        # and, should be dumped as-is without any sorting\n        yield from out.plot.items()\n    if out.remote:\n        yield PARAM_REMOTE, out.remote\n    if not out.can_push:\n        yield PARAM_PUSH, False\n\n\ndef _serialize_out(out):\n    flags = _get_flags(out)\n    return out.def_path if not flags else {out.def_path: flags}\n\n\n@no_type_check\ndef _serialize_outs(outputs: list[Output]):\n    outs, metrics, plots = [], [], []\n    for out in sorted(outputs, key=attrgetter(\"def_path\")):\n        bucket = outs\n        if out.plot:\n            bucket = plots\n        elif out.metric:\n            bucket = metrics\n        bucket.append(_serialize_out(out))\n    return outs, metrics, plots\n\n\ndef _serialize_params_keys(params: Iterable[\"ParamsDependency\"]):\n    \"\"\"\n    Returns the following format of data:\n     ['lr', 'train', {'params2.yaml': ['lr']}]\n\n    The output is sorted, with keys of params from default params file being\n    at the first, and then followed by entry of other files in lexicographic\n    order. The keys of those custom files are also sorted in the same order.\n    \"\"\"\n    keys: list[Union[str, dict[str, Optional[list[str]]]]] = []\n    for param_dep in sorted(params, key=attrgetter(\"def_path\")):\n        # when on no_exec, params are not filled and are saved as list\n        k: list[str] = sorted(param_dep.params)\n        if k and param_dep.def_path == DEFAULT_PARAMS_FILE:\n            keys = k + keys  # type: ignore[operator,assignment]\n        else:\n            keys.append({param_dep.def_path: k or None})\n    return keys\n\n\n@no_type_check\ndef _serialize_params_values(params: list[ParamsDependency]):\n    \"\"\"Returns output of following format, used for lockfile:\n        {'params.yaml': {'lr': '1', 'train': 2}, {'params2.yaml': {'lr': '1'}}\n\n    Default params file are always kept at the start, followed by others in\n    alphabetical order. The param values are sorted too(not recursively though)\n    \"\"\"\n    key_vals = OrderedDict()\n    for param_dep in sorted(params, key=attrgetter(\"def_path\")):\n        dump = param_dep.dumpd()\n        path, params = dump[PARAM_PATH], dump[PARAM_PARAMS]\n        if isinstance(params, dict):\n            kv = [(key, params[key]) for key in sorted(params.keys())]\n            key_vals[path] = OrderedDict(kv)\n            if path == DEFAULT_PARAMS_FILE:\n                key_vals.move_to_end(path, last=False)\n    return key_vals\n\n\ndef to_pipeline_file(stage: \"PipelineStage\"):\n    wdir = resolve_wdir(stage.wdir, stage.path)\n    param_objs, deps_objs = split_params_deps(stage)\n    deps = sorted(d.def_path for d in deps_objs)\n    params = _serialize_params_keys(param_objs)\n\n    outs, metrics, plots = _serialize_outs(stage.outs)\n\n    cmd = stage.cmd\n    assert cmd, (\n        f\"'{stage.PARAM_CMD}' cannot be empty for stage '{stage.name}', \"\n        f\"got: '{cmd}'(type: '{type(cmd).__name__}')\"\n    )\n    res = [\n        (stage.PARAM_DESC, stage.desc),\n        (stage.PARAM_CMD, stage.cmd),\n        (stage.PARAM_WDIR, wdir),\n        (stage.PARAM_DEPS, deps),\n        (stage.PARAM_PARAMS, params),\n        (stage.PARAM_OUTS, outs),\n        (stage.PARAM_METRICS, metrics),\n        (stage.PARAM_PLOTS, plots),\n        (stage.PARAM_FROZEN, stage.frozen),\n        (stage.PARAM_ALWAYS_CHANGED, stage.always_changed),\n        (stage.PARAM_META, stage.meta),\n    ]\n    return {stage.name: OrderedDict([(key, value) for key, value in res if value])}\n\n\ndef to_single_stage_lockfile(stage: \"Stage\", **kwargs) -> dict:\n    from dvc.cachemgr import LEGACY_HASH_NAMES\n    from dvc.dependency import DatasetDependency\n    from dvc.output import (\n        _serialize_hi_to_dict,\n        _serialize_tree_obj_to_files,\n        split_file_meta_from_cloud,\n    )\n    from dvc_data.hashfile.tree import Tree\n\n    assert stage.cmd\n\n    def _dumpd(item: \"Output\"):\n        if isinstance(item, DatasetDependency):\n            return item.dumpd()\n\n        ret: dict[str, Any] = {item.PARAM_PATH: item.def_path}\n        if item.hash_name not in LEGACY_HASH_NAMES:\n            ret[item.PARAM_HASH] = \"md5\"\n        if item.hash_info.isdir and kwargs.get(\"with_files\"):\n            obj = item.obj or item.get_obj()\n            if obj:\n                assert isinstance(obj, Tree)\n                ret[item.PARAM_FILES] = [\n                    split_file_meta_from_cloud(f)\n                    for f in _serialize_tree_obj_to_files(obj)\n                ]\n        else:\n            assert item.meta is not None\n            meta_d = item.meta.to_dict()\n            meta_d.pop(\"isdir\", None)\n            ret.update(_serialize_hi_to_dict(item.hash_info))\n            ret.update(split_file_meta_from_cloud(meta_d))\n        return ret\n\n    res = OrderedDict([(\"cmd\", stage.cmd)])\n    params, deps = split_params_deps(stage)\n    deps, outs = (\n        [_dumpd(item) for item in sorted(items, key=attrgetter(\"def_path\"))]  # type: ignore[call-overload]\n        for items in [deps, stage.outs]\n    )\n    params = _serialize_params_values(params)\n    if deps:\n        res[PARAM_DEPS] = deps\n    if params:\n        res[PARAM_PARAMS] = params\n    if outs:\n        res[PARAM_OUTS] = outs\n\n    return res\n\n\ndef to_lockfile(stage: \"PipelineStage\", **kwargs) -> dict:\n    assert stage.name\n    return {stage.name: to_single_stage_lockfile(stage, **kwargs)}\n\n\ndef to_single_stage_file(stage: \"Stage\", **kwargs):\n    state = stage.dumpd(**kwargs)\n\n    # When we load a stage we parse yaml with a fast parser, which strips\n    # off all the comments and formatting. To retain those on update we do\n    # a trick here:\n    # - reparse the same yaml text with a slow but smart ruamel yaml parser\n    # - apply changes to a returned structure\n    # - serialize it\n    text = stage._stage_text\n    if text is None:\n        return state\n\n    saved_state = parse_yaml_for_update(text, stage.path)\n    apply_diff(state, saved_state)\n    return saved_state\n"
  },
  {
    "path": "dvc/stage/utils.py",
    "content": "import os\nimport pathlib\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nfrom funcy import concat, first, lsplit, rpartial\n\nfrom dvc.annotations import ANNOTATION_FIELDS\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc_data.hashfile.meta import Meta\n\nfrom .exceptions import (\n    MissingDataSource,\n    StageExternalOutputsError,\n    StagePathNotDirectoryError,\n    StagePathNotFoundError,\n    StagePathOutsideError,\n)\n\nif TYPE_CHECKING:\n    from dvc.dependency import Dependency, ParamsDependency\n    from dvc.repo import Repo\n\n    from . import PipelineStage, Stage\n\n\ndef check_stage_path(repo, path, is_wdir=False):\n    from dvc.utils.fs import path_isin\n\n    assert repo is not None\n\n    error_msg = \"{wdir_or_path} '{path}' {{}}\".format(\n        wdir_or_path=\"stage working dir\" if is_wdir else \"file path\", path=path\n    )\n\n    real_path = os.path.abspath(path)\n    if not os.path.exists(real_path):\n        raise StagePathNotFoundError(error_msg.format(\"does not exist\"))\n\n    if not os.path.isdir(real_path):\n        raise StagePathNotDirectoryError(error_msg.format(\"is not directory\"))\n\n    proj_dir = os.path.abspath(repo.root_dir)\n    if real_path != proj_dir and not path_isin(real_path, proj_dir):\n        raise StagePathOutsideError(error_msg.format(\"is outside of DVC repo\"))\n\n\ndef fill_stage_outputs(stage, **kwargs):\n    from dvc.output import loads_from\n\n    assert not stage.outs\n\n    keys = [\n        \"outs_persist\",\n        \"outs_persist_no_cache\",\n        \"metrics\",\n        \"metrics_persist\",\n        \"metrics_no_cache\",\n        \"metrics_persist_no_cache\",\n        \"plots\",\n        \"plots_persist\",\n        \"plots_no_cache\",\n        \"plots_persist_no_cache\",\n        \"outs_no_cache\",\n        \"outs\",\n    ]\n\n    stage.outs = []\n\n    for key in keys:\n        stage.outs += loads_from(\n            stage,\n            kwargs.get(key, []),\n            use_cache=\"no_cache\" not in key,\n            persist=\"persist\" in key,\n            metric=\"metrics\" in key,\n            plot=\"plots\" in key,\n        )\n\n\ndef fill_stage_dependencies(\n    stage, deps=None, erepo=None, params=None, fs_config=None, db=None\n):\n    from dvc.dependency import loads_from, loads_params\n\n    assert not stage.deps\n    stage.deps = []\n    stage.deps += loads_from(stage, deps or [], erepo=erepo, fs_config=fs_config, db=db)\n    stage.deps += loads_params(stage, params or [])\n\n\ndef check_no_externals(stage):\n    from dvc.utils import format_link\n\n    def _is_cached_external(out):\n        return not out.is_in_repo and out.use_cache\n\n    outs = [str(out) for out in stage.outs if _is_cached_external(out)]\n    if not outs:\n        return\n\n    str_outs = \", \".join(outs)\n    link = format_link(\n        \"https://dvc.org/doc/user-guide/pipelines/external-dependencies-and-outputs\"\n    )\n    if stage.is_data_source:\n        link = format_link(\n            \"https://dvc.org/doc/user-guide/data-management/importing-external-data\"\n        )\n    raise StageExternalOutputsError(\n        f\"Cached output(s) outside of DVC project: {str_outs}. \"\n        f\"See {link} for more info.\"\n    )\n\n\ndef check_circular_dependency(stage):\n    from dvc.exceptions import CircularDependencyError\n\n    circular_dependencies = {d.fs_path for d in stage.deps} & {\n        o.fs_path for o in stage.outs\n    }\n\n    if circular_dependencies:\n        raise CircularDependencyError(str(circular_dependencies.pop()))\n\n\ndef check_duplicated_arguments(stage):\n    from collections import Counter\n\n    from dvc.exceptions import ArgumentDuplicationError\n\n    path_counts = Counter(edge.fs_path for edge in stage.deps + stage.outs)\n\n    for path, occurrence in path_counts.items():\n        if occurrence > 1:\n            raise ArgumentDuplicationError(str(path))\n\n\ndef check_missing_outputs(stage):\n    paths = [str(out) for out in stage.outs if not out.exists]\n    if paths:\n        raise MissingDataSource(paths)\n\n\ndef compute_md5(stage):\n    from dvc.output import Output\n    from dvc.utils import dict_md5\n\n    d = stage.dumpd()\n\n    # Remove md5 and meta, these should not affect stage md5\n    d.pop(stage.PARAM_MD5, None)\n    d.pop(stage.PARAM_META, None)\n    d.pop(stage.PARAM_DESC, None)\n\n    # Ignore the wdir default value. In this case DVC file w/o\n    # wdir has the same md5 as a file with the default value specified.\n    # It's important for backward compatibility with pipelines that\n    # didn't have WDIR in their DVC files.\n    if d.get(stage.PARAM_WDIR) == \".\":\n        del d[stage.PARAM_WDIR]\n\n    return dict_md5(\n        d,\n        exclude=[\n            *ANNOTATION_FIELDS,\n            stage.PARAM_LOCKED,  # backward compatibility\n            stage.PARAM_FROZEN,\n            Output.PARAM_METRIC,\n            Output.PARAM_PERSIST,\n            Meta.PARAM_ISEXEC,\n            Meta.PARAM_SIZE,\n            Meta.PARAM_NFILES,\n        ],\n    )\n\n\ndef resolve_wdir(wdir, path):\n    from dvc.utils import relpath\n\n    rel_wdir = relpath(wdir, os.path.dirname(path))\n    return pathlib.PurePath(rel_wdir).as_posix() if rel_wdir != \".\" else None\n\n\ndef resolve_paths(fs, path, wdir=None):\n    path = fs.abspath(path)\n    wdir = wdir or os.curdir\n    wdir = fs.abspath(fs.join(fs.dirname(path), wdir))\n    return path, wdir\n\n\ndef get_dump(stage: \"Stage\", **kwargs):\n    return {\n        key: value\n        for key, value in {\n            stage.PARAM_DESC: stage.desc,\n            stage.PARAM_MD5: stage.md5,\n            stage.PARAM_CMD: stage.cmd,\n            stage.PARAM_WDIR: resolve_wdir(stage.wdir, stage.path),\n            stage.PARAM_FROZEN: stage.frozen,\n            stage.PARAM_DEPS: [d.dumpd(**kwargs) for d in stage.deps],\n            stage.PARAM_OUTS: [o.dumpd(**kwargs) for o in stage.outs],\n            stage.PARAM_ALWAYS_CHANGED: stage.always_changed,\n            stage.PARAM_META: stage.meta,\n        }.items()\n        if value\n    }\n\n\ndef split_params_deps(\n    stage: \"Stage\",\n) -> tuple[list[\"ParamsDependency\"], list[\"Dependency\"]]:\n    from dvc.dependency import ParamsDependency\n\n    return lsplit(rpartial(isinstance, ParamsDependency), stage.deps)\n\n\ndef is_valid_name(name: str) -> bool:\n    from . import INVALID_STAGENAME_CHARS\n\n    return not INVALID_STAGENAME_CHARS & set(name)\n\n\ndef prepare_file_path(kwargs) -> str:\n    \"\"\"Determine file path from the first output name.\n\n    Used in creating .dvc files.\n    \"\"\"\n    from dvc.dvcfile import DVC_FILE_SUFFIX\n\n    out = first(\n        concat(\n            kwargs.get(\"outs\", []),\n            kwargs.get(\"outs_no_cache\", []),\n            kwargs.get(\"metrics\", []),\n            kwargs.get(\"metrics_no_cache\", []),\n            kwargs.get(\"plots\", []),\n            kwargs.get(\"plots_no_cache\", []),\n            kwargs.get(\"outs_persist\", []),\n            kwargs.get(\"outs_persist_no_cache\", []),\n        )\n    )\n    assert out\n    return os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX\n\n\ndef check_stage_exists(repo: \"Repo\", stage: Union[\"Stage\", \"PipelineStage\"], path: str):\n    from dvc.dvcfile import load_file\n    from dvc.stage import PipelineStage\n    from dvc.stage.exceptions import DuplicateStageName, StageFileAlreadyExistsError\n\n    dvcfile = load_file(repo, path)\n    if not dvcfile.exists():\n        return\n\n    hint = \"Use '--force' to overwrite.\"\n    if not isinstance(stage, PipelineStage):\n        raise StageFileAlreadyExistsError(f\"'{stage.relpath}' already exists. {hint}\")\n    if stage.name and stage.name in dvcfile.stages:\n        raise DuplicateStageName(\n            f\"Stage '{stage.name}' already exists in '{stage.relpath}'. {hint}\"\n        )\n\n\ndef validate_kwargs(\n    single_stage: bool = False, fname: Optional[str] = None, **kwargs\n) -> dict[str, Any]:\n    \"\"\"Prepare, validate and process kwargs passed from cli\"\"\"\n    cmd = kwargs.get(\"cmd\")\n    if not cmd and not single_stage:\n        raise InvalidArgumentError(\"command is not specified\")\n\n    stage_name = kwargs.get(\"name\")\n    if stage_name and single_stage:\n        raise InvalidArgumentError(\"`-n|--name` is incompatible with `--single-stage`\")\n    if stage_name and fname:\n        raise InvalidArgumentError(\n            \"`--file` is currently incompatible with `-n|--name` \"\n            \"and requires `--single-stage`\"\n        )\n    if not stage_name and not single_stage:\n        raise InvalidArgumentError(\"`-n|--name` is required\")\n\n    if single_stage:\n        kwargs.pop(\"name\", None)\n\n    return kwargs\n\n\ndef _get_stage_files(stage: \"Stage\") -> list[str]:\n    from dvc.dvcfile import ProjectFile\n    from dvc.utils import relpath\n\n    ret: list[str] = []\n    file = stage.dvcfile\n    ret.append(file.relpath)\n    if isinstance(file, ProjectFile):\n        ret.append(file._lockfile.relpath)\n\n    for dep in stage.deps:\n        if (\n            not dep.use_scm_ignore\n            and dep.is_in_repo\n            and not stage.repo.dvcfs.isdvc(stage.repo.dvcfs.from_os_path(str(dep)))\n        ):\n            ret.append(relpath(dep.fs_path))  # noqa: PERF401\n\n    for out in stage.outs:\n        if not out.use_scm_ignore and out.is_in_repo:\n            ret.append(relpath(out.fs_path))  # noqa: PERF401\n    return ret\n"
  },
  {
    "path": "dvc/testing/README.rst",
    "content": "DVC pytest plugin\n\ndvc.testing.benchmarks\n======================\n\nBenchmark test definitions are now part of ``dvc.testing``.\nFor CLI usage and `bench.dvc.org <https://bench.dvc.org>`_ details see `dvc-bench <https://github.com/treeverse/dvc-bench>`_.\n\n``dvc.testing.benchmarks`` structure:\n\n* cli: should be able to run these with any dvc (rpm, deb, pypi, snap, etc) (could be used in dvc-test repo too)\n\n  * commands: granular tests for individual commands. These should have a cached setup, so that we could use them during rapid development instead of our hand-written scripts. Every test could be run in a separate machine.\n  * stories: multistage start-to-end benchmarks, useful for testing workflows (e.g. in documentation, see test_sharing inspired by `Storing and sharing <https://dvc.org/doc/start/data-management/data-versioning#storing-and-sharing>`_. Every full story could be run in a separate machine.\n\n* api: for python api only.\n\n  * methods: granular tests for individual methods (e.g. ``api.open/read``). Same reasoning as in ``cli.commands``\n  * stories: same as ``cli.stories`` but for our api. E.g. imagine using our api with pandas or smth like that.\n"
  },
  {
    "path": "dvc/testing/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/api_tests.py",
    "content": "import pytest\n\nfrom dvc import api\nfrom dvc.api import DVCFileSystem\nfrom dvc.testing import matchers as M\nfrom dvc.utils.fs import remove\n\n\nclass TestAPI:\n    def test_get_url(self, tmp_dir, dvc, remote):\n        tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n        expected_url = (\n            remote / \"files\" / \"md5\" / \"ac/bd18db4cc2f85cedef654fccc4a4d8\"\n        ).url\n        assert api.get_url(\"foo\") == expected_url\n\n    def test_open(self, tmp_dir, dvc, remote):\n        tmp_dir.dvc_gen({\"foo\": \"foo-text\", \"dir\": {\"bar\": \"bar-text\"}})\n        dvc.push()\n\n        # Remove cache to force download\n        remove(dvc.cache.local.path)\n\n        with api.open(\"foo\") as fobj:\n            assert fobj.read() == \"foo-text\"\n\n        with api.open(\"dir/bar\") as fobj:\n            assert fobj.read() == \"bar-text\"\n\n    @pytest.mark.parametrize(\"clear_cache\", [True, False], ids=[\"cache\", \"no_cache\"])\n    @pytest.mark.parametrize(\n        \"fs_kwargs\",\n        [\n            {},\n            {\"repo\": \"{path}\"},\n            {\"repo\": \"{path}\", \"rev\": \"{default_branch}\"},\n            {\"repo\": \"file://{posixpath}\"},\n            {\"repo\": \"file://{posixpath}\", \"rev\": \"{default_branch}\"},\n            {\"url\": \"{path}\"},  # test for backward compatibility\n        ],\n        ids=[\"current\", \"local\", \"local_rev\", \"git\", \"git_rev\", \"local_url\"],\n    )\n    def test_filesystem(\n        self,\n        tmp_dir,\n        make_tmp_dir,\n        scm,\n        dvc,\n        remote,\n        fs_kwargs,\n        clear_cache,\n    ):\n        fs_kwargs = fs_kwargs.copy()\n\n        tmp_dir.scm_gen({\"scripts\": {\"script1\": \"script1\"}}, commit=\"scripts\")\n        tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"data\")\n        dvc.push()\n\n        if clear_cache:\n            remove(dvc.cache.repo.path)\n\n        if repo := fs_kwargs.get(\"repo\"):\n            fs_kwargs[\"repo\"] = repo.format(path=tmp_dir, posixpath=tmp_dir.as_posix())\n        if url := fs_kwargs.get(\"url\"):\n            fs_kwargs[\"url\"] = url.format(path=tmp_dir, posixpath=tmp_dir.as_posix())\n        if rev := fs_kwargs.get(\"rev\"):\n            fs_kwargs[\"rev\"] = rev.format(default_branch=scm.active_branch())\n\n        fs = DVCFileSystem(**fs_kwargs)\n\n        assert fs.ls(\"/\", detail=False) == M.unordered(\n            \"/.gitignore\", \"/scripts\", \"/data\"\n        )\n        assert fs.ls(\"scripts\", detail=False) == [\"scripts/script1\"]\n        assert fs.ls(\"data\", detail=False) == M.unordered(\"data/foo\", \"data/bar\")\n\n        data_info = M.dict(\n            name=\"/data\",\n            type=\"directory\",\n            dvc_info=M.dict(isdvc=True, isout=True),\n        )\n        scripts_info = M.dict(name=\"/scripts\", type=\"directory\", isexec=False)\n\n        assert sorted(fs.ls(\"/\"), key=lambda i: i[\"name\"]) == [\n            M.dict(name=\"/.gitignore\", type=\"file\", isexec=False),\n            data_info,\n            scripts_info,\n        ]\n\n        with pytest.raises(FileNotFoundError):\n            fs.info(\"/not-existing-path\")\n\n        assert fs.info(\"/\") == M.dict(name=\"/\", isexec=False, type=\"directory\")\n        assert fs.info(\"/data\") == data_info\n        assert fs.info(\"/scripts\") == scripts_info\n        assert fs.info(\"/data/foo\") == M.dict(name=\"/data/foo\", type=\"file\")\n        assert fs.info(\"/scripts/script1\") == M.dict(\n            name=\"/scripts/script1\", type=\"file\"\n        )\n\n        assert not fs.isdvc(\"/\")\n        assert fs.isdvc(\"/data\")\n        assert fs.isdvc(\"/data/foo\")\n        assert not fs.isdvc(\"/scripts\")\n        assert not fs.isdvc(\"/scripts/script1\")\n\n        with pytest.raises((IsADirectoryError, PermissionError)):\n            fs.open(\"data\")\n        with pytest.raises((IsADirectoryError, PermissionError)):\n            fs.open(\"scripts\")\n        with fs.open(\"/data/foo\") as fobj:\n            assert fobj.read() == b\"foo\"\n        with fs.open(\"/scripts/script1\") as fobj:\n            assert fobj.read() == b\"script1\"\n\n        tmp = make_tmp_dir(\"temp-download\")\n        fs.get_file(\"data/foo\", (tmp / \"foo\").fs_path)\n        assert (tmp / \"foo\").read_text() == \"foo\"\n\n        fs.get_file(\"scripts/script1\", (tmp / \"script1\").fs_path)\n        assert (tmp / \"script1\").read_text() == \"script1\"\n\n        fs.get(\"/\", (tmp / \"all\").fs_path, recursive=True)\n        assert (tmp / \"all\").read_text() == {\n            \".gitignore\": \"/data\\n\",\n            \"data\": {\"bar\": \"bar\", \"foo\": \"foo\"},\n            \"scripts\": {\"script1\": \"script1\"},\n        }\n"
  },
  {
    "path": "dvc/testing/benchmarks/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/api/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/cli/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_add.py",
    "content": "from .test_checkout import _skip_unsupported_link\n\n\ndef generate_test(*, link_type=\"copy\"):\n    def _test_add(bench_dvc, tmp_dir, dvc, dataset):\n        _skip_unsupported_link((tmp_dir / \".dvc\" / \"cache\"), tmp_dir, link_type)\n\n        with dvc.config.edit() as conf:\n            conf[\"cache\"][\"type\"] = link_type\n\n        bench_dvc(\"add\", dataset)\n        bench_dvc(\"add\", dataset, name=\"noop\")\n\n    return _test_add\n\n\ntest_add_copy = generate_test(link_type=\"copy\")\ntest_add_symlink = generate_test(link_type=\"symlink\")\ntest_add_hardlink = generate_test(link_type=\"hardlink\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_checkout.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.fs import localfs\nfrom dvc_objects.fs import generic\nfrom dvc_objects.fs.utils import tmp_fname\n\n\ndef _skip_unsupported_link(src, dest, link_type):\n    src_test_file = os.path.join(src, tmp_fname())\n    dest_test_file = os.path.join(dest, tmp_fname())\n    if not generic.test_links(\n        [link_type], localfs, src_test_file, localfs, dest_test_file\n    ):\n        pytest.skip(f\"{link_type} not supported\")\n\n\ndef generate_test(*, link_type=\"copy\"):\n    def _test_checkout_func(bench_dvc, tmp_dir, dvc, make_dataset):\n        dataset = make_dataset(dvcfile=True, cache=True, files=False)\n\n        _skip_unsupported_link((tmp_dir / \".dvc\" / \"cache\"), tmp_dir, link_type)\n\n        with dvc.config.edit() as conf:\n            conf[\"cache\"][\"type\"] = link_type\n\n        bench_dvc(\"checkout\", dataset)\n        bench_dvc(\"checkout\", name=\"noop\")\n        (dataset / \"new\").write_text(\"new\")\n        bench_dvc(\"checkout\", \"--force\", name=\"update\")\n\n    return _test_checkout_func\n\n\ntest_checkout_copy = generate_test(link_type=\"copy\")\ntest_checkout_symlink = generate_test(link_type=\"symlink\")\ntest_checkout_hardlink = generate_test(link_type=\"hardlink\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_data_status.py",
    "content": "from shutil import rmtree\n\n\ndef test_data_status(bench_dvc, tmp_dir, scm, dvc, make_dataset):\n    args = (\"data\", \"status\")\n    dataset = make_dataset(cache=True, files=True, dvcfile=True, commit=False)\n    scm.ignore(dataset)\n    rmtree(dvc.tmp_dir)\n\n    bench_dvc(*args, name=\"new\")\n    bench_dvc(*args, name=\"noop\")\n\n    tmp_dir.scm_add(\n        [dataset.with_suffix(\".dvc\").name, \".gitignore\"], commit=\"add dataset\"\n    )\n\n    (dataset / \"new\").write_text(\"new\")\n    bench_dvc(*args, name=\"changed\")\n    bench_dvc(*args, name=\"changed-noop\")\n\n\ndef test_data_status_all_flags(bench_dvc, tmp_dir, scm, dvc, make_dataset):\n    args = (\n        \"data\",\n        \"status\",\n        \"--granular\",\n        \"--unchanged\",\n        \"--untracked-files\",\n        \"--json\",\n    )\n    dataset = make_dataset(cache=True, files=True, dvcfile=True, commit=False)\n    scm.ignore(dataset)\n    rmtree(dvc.tmp_dir)\n\n    bench_dvc(*args, name=\"new\")\n    bench_dvc(*args, name=\"noop\")\n\n    tmp_dir.scm_add(\n        [dataset.with_suffix(\".dvc\").name, \".gitignore\"], commit=\"add dataset\"\n    )\n\n    (dataset / \"new\").write_text(\"new\")\n    bench_dvc(*args, name=\"changed\")\n    bench_dvc(*args, name=\"changed-noop\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_diff.py",
    "content": "def test_diff(bench_dvc, tmp_dir, scm, dvc, make_dataset):\n    dataset = make_dataset(cache=True, files=True, dvcfile=True, commit=True)\n    bench_dvc(\"diff\")\n    bench_dvc(\"diff\", name=\"noop\")\n\n    (dataset / \"new\").write_text(\"new\")\n    bench_dvc(\"diff\", name=\"changed\")\n    bench_dvc(\"diff\", name=\"changed-noop\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_exp_show.py",
    "content": "def test_exp_show(make_project, monkeypatch, bench_dvc, dvc_bin):\n    url = \"https://github.com/iterative/example-get-started\"\n    rev = \"main\"\n    path = make_project(url, rev=rev)\n    monkeypatch.chdir(path)\n\n    dvc_bin(\"exp\", \"pull\", \"-A\", \"--no-cache\", \"origin\")\n\n    bench_dvc(\"exp\", \"show\", \"-A\", \"--no-pager\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_fetch.py",
    "content": "def test_fetch(bench_dvc, tmp_dir, dvc, make_dataset, remote):\n    make_dataset(cache=False, dvcfile=True, files=False, remote=True)\n    bench_dvc(\"fetch\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_gc.py",
    "content": "def test_gc(bench_dvc, tmp_dir, dvc, make_dataset):\n    make_dataset(files=False, cache=True, dvcfile=False)\n    bench_dvc(\"gc\", \"-f\", \"-w\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_get.py",
    "content": "import pytest\n\n\n@pytest.mark.flaky(reruns=3)\ndef test_get(bench_dvc, tmp_dir, scm, dvc, make_dataset, remote):\n    dataset = make_dataset(\n        cache=False, files=False, dvcfile=True, commit=True, remote=True\n    )\n    bench_dvc(\"get\", tmp_dir, dataset.name, \"-o\", \"new\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_get_url.py",
    "content": "def test_get_url(bench_dvc, tmp_dir, scm, dvc, make_dataset):\n    dataset = make_dataset(\n        cache=False, files=True, dvcfile=False, commit=False, remote=False\n    )\n    bench_dvc(\"get-url\", str(dataset), \"new\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_help.py",
    "content": "def test_help(bench_dvc):\n    bench_dvc(\"--help\", rounds=100)\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_import.py",
    "content": "import pytest\n\n\n@pytest.mark.flaky(reruns=3)\n@pytest.mark.requires(\n    \"!=3.53.*,!=3.54.0\",\n    reason=\"Takes 10 mins to run. Regression in 3.53.0, fixed in 3.54.1\",\n)\n# Introduced in https://github.com/treeverse/dvc/pull/10388.\n# Fixed in https://github.com/treeverse/dvc/pull/10531.\ndef test_import(bench_dvc, tmp_dir, scm, dvc, make_dataset, remote):\n    dataset = make_dataset(\n        cache=False, files=False, dvcfile=True, commit=True, remote=True\n    )\n    bench_dvc(\"import\", tmp_dir, dataset.name, \"-o\", \"new\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_import_url.py",
    "content": "def test_import_url(bench_dvc, tmp_dir, scm, dvc, make_dataset):\n    dataset = make_dataset(\n        cache=False, files=True, dvcfile=False, commit=False, remote=False\n    )\n    bench_dvc(\"import-url\", str(dataset), \"new\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_init.py",
    "content": "import shutil\n\n\ndef test_init(bench_dvc, tmp_dir, scm):\n    def _cleanup_dir():\n        for item in tmp_dir.iterdir():\n            if item.is_dir():\n                if item.name != \".git\":\n                    shutil.rmtree(item)\n            else:\n                item.unlink()\n\n    bench_dvc(\"init\", setup=_cleanup_dir, rounds=100, warmup_rounds=1)\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_ls.py",
    "content": "def test_list(bench_dvc, tmp_dir, scm, dvc, make_dataset, remote):\n    make_dataset(cache=False, files=False, dvcfile=True, commit=True, remote=True)\n    bench_dvc(\"list\", tmp_dir)\n    bench_dvc(\"list\", tmp_dir, \"--dvc-only\", name=\"dvc-only\")\n    bench_dvc(\"list\", tmp_dir, \"--recursive\", name=\"recursive\")\n    bench_dvc(\"list\", tmp_dir, \"dataset\", name=\"shallow\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_plots.py",
    "content": "from dvc.repo import Repo\nfrom dvc.testing.benchmarks.fixtures import _pull\n\n\ndef test_plots(project, bench_dvc):\n    with Repo() as dvc:\n        _pull(dvc)\n\n    kwargs = {\"rounds\": 5, \"iterations\": 3, \"warmup_rounds\": 2}\n    bench_dvc(\"plots\", \"show\", name=\"show\", **kwargs)\n    bench_dvc(\"plots\", \"show\", \"--json\", name=\"show-json\", **kwargs)\n    bench_dvc(\"plots\", \"diff\", \"HEAD\", name=\"diff\", **kwargs)\n    bench_dvc(\"plots\", \"diff\", \"HEAD\", \"--json\", name=\"diff-json\", **kwargs)\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_push.py",
    "content": "def test_push(bench_dvc, tmp_dir, dvc, make_dataset, remote):\n    make_dataset(cache=True, dvcfile=True, files=False)\n    bench_dvc(\"push\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_status.py",
    "content": "def test_status(bench_dvc, tmp_dir, dvc, make_dataset):\n    dataset = make_dataset(files=True, dvcfile=True, cache=True)\n    bench_dvc(\"status\")\n    bench_dvc(\"status\", name=\"noop\")\n\n    (dataset / \"new\").write_text(\"new\")\n    bench_dvc(\"status\", name=\"changed\")\n    bench_dvc(\"status\", name=\"changed-noop\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/commands/test_update.py",
    "content": "def test_update(bench_dvc, tmp_dir, dvc, make_dataset):\n    dataset = make_dataset(\n        cache=False, files=True, dvcfile=False, commit=False, remote=False\n    )\n    bench_dvc(\"import-url\", str(dataset), \"new\")\n    (dataset / \"new\").write_text(\"new\")\n    bench_dvc(\"update\", \"new.dvc\")\n    bench_dvc(\"update\", \"new.dvc\", name=\"noop\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/stories/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/cli/stories/test_modify_data.py",
    "content": "\"\"\"\nTests for modifications to an existing dataset.\n\"\"\"\n\nimport glob\nimport os\nimport random\nimport shutil\nimport sys\n\nimport pytest\n\n\n@pytest.mark.skipif(sys.version_info < (3, 10), reason=\"requires 3.10 glob.glob\")\ndef test_partial_add(bench_dvc, tmp_dir, dvc, dataset, remote):\n    random.seed(4231)\n    # Move some files to create a partial dataset\n    os.makedirs(\"partial-copy\")\n    for f in glob.glob(\"*\", root_dir=dataset, recursive=True):  # type: ignore[call-arg]\n        if random.random() > 0.5:  # noqa: S311\n            shutil.move(dataset / f, tmp_dir / \"partial-copy\" / f)\n\n    # Add/push partial dataset\n    bench_dvc(\"add\", dataset)\n    bench_dvc(\"push\")\n\n    # Add more files to the dataset\n    shutil.copytree(\"partial-copy\", dataset, dirs_exist_ok=True)\n\n    # Benchmark operations for adding files to a dataset\n    bench_dvc(\"add\", dataset, name=\"partial\")\n    bench_dvc(\"push\", name=\"partial\")\n    bench_dvc(\"gc\", \"-f\", \"-w\", name=\"noop\")\n    bench_dvc(\"gc\", \"-f\", \"-w\", \"-c\", name=\"cloud-noop\")\n\n\n@pytest.mark.skipif(sys.version_info < (3, 10), reason=\"requires 3.10 glob.glob\")\ndef test_partial_remove(bench_dvc, tmp_dir, dvc, dataset, remote):\n    random.seed(5232)\n    # Add/push full dataset\n    bench_dvc(\"add\", dataset)\n    bench_dvc(\"push\")\n\n    # Remove some files\n    for f in glob.glob(\"*\", root_dir=dataset, recursive=True):  # type: ignore[call-arg]\n        if random.random() > 0.5:  # noqa: S311\n            if os.path.isfile(dataset / f):\n                os.remove(dataset / f)\n            elif os.path.isdir(dataset / f):\n                shutil.rmtree(dataset / f)\n\n    # Benchmark operations for removing files from dataset\n    bench_dvc(\"add\", dataset, name=\"update\")\n    bench_dvc(\"push\", name=\"update\")\n    bench_dvc(\"gc\", \"-f\", \"-w\")\n    bench_dvc(\"gc\", \"-f\", \"-w\", \"-c\", name=\"cloud\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/cli/stories/use_cases/__init__.py",
    "content": ""
  },
  {
    "path": "dvc/testing/benchmarks/cli/stories/use_cases/test_sharing.py",
    "content": "import shutil\n\nfrom packaging import version\n\n\ndef test_sharing(bench_dvc, tmp_dir, dvc, make_dataset, remote, dvc_bin):\n    is_gs = dvc.config[\"remote\"][\"upstream\"][\"url\"].startswith(\"gs://\")\n    if is_gs and version.Version(dvc_bin.version) < version.Version(\"3.59.2\"):\n        # support for allow_anonymous_login in gs was introduced in dvc==3.60\n        # This should not impact the test, just that it will make it slower if used\n        # with fake-gcs-server.\n        with dvc.config.edit() as d:\n            d[\"remote\"][\"upstream\"].pop(\"allow_anonymous_login\", None)\n\n    dataset = make_dataset(cache=True, dvcfile=True)\n\n    bench_dvc(\"push\")\n    bench_dvc(\"push\", name=\"noop\")\n\n    shutil.rmtree(dataset)\n    shutil.rmtree(tmp_dir / \".dvc\" / \"cache\")\n\n    bench_dvc(\"fetch\")\n    bench_dvc(\"fetch\", name=\"noop\")\n"
  },
  {
    "path": "dvc/testing/benchmarks/conftest.py",
    "content": "from .fixtures import *  # noqa: F403\n\npytest_plugins = [\"dvc.testing.plugin\"]\n"
  },
  {
    "path": "dvc/testing/benchmarks/fixtures.py",
    "content": "import inspect\nimport os\nimport shutil\nimport sys\nfrom pathlib import Path\nfrom subprocess import check_call, check_output\nfrom typing import Optional\n\nimport pytest\nfrom dulwich.porcelain import clone\nfrom funcy import first\nfrom packaging import specifiers, version\n\nfrom dvc.types import StrPath\n\n\n@pytest.fixture(scope=\"session\")\ndef bench_config(request):\n    return request.config.bench_config\n\n\nclass VirtualEnv:\n    def __init__(self, path: StrPath) -> None:\n        self.path = Path(path)\n        self.bin = self.path / (\"Scripts\" if os.name == \"nt\" else \"bin\")\n\n    def create(self) -> None:\n        check_call([sys.executable, \"-m\", \"uv\", \"venv\", self.path])  # noqa:S603\n\n    def install(self, *packages: str) -> None:\n        check_call([sys.executable, \"-m\", \"uv\", \"pip\", \"install\", *packages])  # noqa: S603\n\n    def run(self, cmd: str, *args: str, env: Optional[dict[str, str]] = None) -> None:\n        exe = self.which(cmd)\n        check_call([exe, *args], env=env)  # noqa: S603\n\n    def which(self, cmd: str) -> str:\n        assert self.bin.exists()\n        return shutil.which(cmd, path=self.bin) or cmd\n\n\n@pytest.fixture(scope=\"session\")\ndef make_dvc_venv(tmp_path_factory):\n    def _make_dvc_venv(name):\n        name = _sanitize_venv_name(name)\n        venv_dir = tmp_path_factory.mktemp(f\"dvc-venv-{name}\")\n        venv = VirtualEnv(venv_dir)\n        venv.create()\n        return venv\n\n    return _make_dvc_venv\n\n\ndef _sanitize_venv_name(name):\n    return name.replace(\"/\", \"-\").replace(\"\\\\\", \"-\")\n\n\n@pytest.fixture(scope=\"session\")\ndef dvc_venvs():\n    return {}\n\n\n@pytest.fixture(scope=\"session\")\ndef dvc_repo(tmp_path_factory, bench_config):\n    url = bench_config.dvc_repo\n\n    if os.path.isdir(url):\n        return url\n\n    tmp_path = tmp_path_factory.mktemp(\"dvc-git-repo\")\n    clone(url, os.fspath(tmp_path))\n\n    return tmp_path\n\n\n@pytest.fixture(scope=\"session\")\ndef dvc_bench_repo(tmp_path_factory, bench_config):\n    url = bench_config.dvc_bench_repo\n    if url is None:\n        pytest.skip(\n            \"--dvc-bench-repo is not set, \"\n            \"clone https://github.com/treeverse/dvc-bench repository and set its path\"\n        )\n\n    if os.path.isdir(url):\n        return Path(url)\n\n    tmp_path = tmp_path_factory.mktemp(\"dvc-bench-git-repo\")\n    clone(url, os.fspath(tmp_path))\n\n    return tmp_path\n\n\n@pytest.fixture(scope=\"session\")\ndef make_dvc_bin(\n    dvc_rev,\n    dvc_venvs,\n    make_dvc_venv,\n    dvc_repo,\n    bench_config,\n    request,\n):\n    if dvc_rev:\n        venv: VirtualEnv = dvc_venvs.get(dvc_rev)\n        if not venv:\n            venv = make_dvc_venv(dvc_rev)\n            if bench_config.dvc_install_deps:\n                pkg = f\"dvc[{bench_config.dvc_install_deps}]\"\n            else:\n                pkg = \"dvc\"\n            packages = [f\"{pkg} @ git+file://{dvc_repo}@{dvc_rev}\"]\n\n            version_constraints = [\n                (\"<3.50.3\", [\"pygit2==1.14.1\"]),\n                (\"<3.44.0\", [\"dulwich<1.0.0\"]),\n                (\"<3.67.0\", [\"pathspec<1\"]),\n            ]\n            for spec, pkgs in version_constraints:\n                try:\n                    _dvc_version = version.Version(dvc_rev)\n                except version.InvalidVersion:\n                    continue\n                if _dvc_version in specifiers.SpecifierSet(spec):\n                    packages.extend(pkgs)\n\n            venv.install(*packages)\n\n            dvc_venvs[dvc_rev] = venv\n        dvc_bin = venv.which(\"dvc\")\n    else:\n        dvc_bin = bench_config.dvc_bin\n\n    def _dvc_bin(*args):\n        check_call([dvc_bin, *args])  # noqa: S603\n\n    _dvc_bin.version = check_output([dvc_bin, \"--version\"], text=True)  # type: ignore[attr-defined]  # noqa: S603\n    return _dvc_bin\n\n\n@pytest.fixture\ndef dvc_bin(request, make_dvc_bin):\n    if marker := request.node.get_closest_marker(\"requires\"):\n        from packaging.specifiers import SpecifierSet\n        from packaging.version import Version, parse\n\n        spec = first(marker.args)\n        assert spec is not None\n        spec = SpecifierSet(spec) if isinstance(spec, str) else spec\n        reason = marker.kwargs[\"reason\"]\n        dvc_version = make_dvc_bin.version\n        version = Version(parse(dvc_version).base_version)\n        if version not in spec:\n            pytest.skip(\n                f\"Version {dvc_version} does not satisfy requirement {spec!r}: {reason}\"\n            )\n    return make_dvc_bin\n\n\n@pytest.fixture\ndef make_bench(request):\n    def _make_bench(name):\n        import pytest_benchmark.plugin\n\n        # hack from https://github.com/ionelmc/pytest-benchmark/issues/166\n        fixture_function = pytest_benchmark.plugin.benchmark\n        try:\n            # pytest >= 8.4.0\n            wrapped_func = fixture_function._get_wrapped_function()\n        except AttributeError:\n            wrapped_func = fixture_function.__pytest_wrapped__.obj  # type: ignore[attr-defined]  # ty: ignore[unresolved-attribute]\n        assert inspect.isgeneratorfunction(wrapped_func)\n\n        generator = wrapped_func(request)\n        bench = next(generator)\n        assert isinstance(bench, pytest_benchmark.plugin.BenchmarkFixture)\n        request.addfinalizer(lambda: next(generator, None))\n\n        suffix = f\"-{name}\"\n\n        def add_suffix(_name):\n            start, sep, end = _name.partition(\"[\")\n            return start + suffix + sep + end\n\n        bench.name = add_suffix(bench.name)\n        bench.fullname = add_suffix(bench.fullname)\n\n        return bench\n\n    return _make_bench\n\n\n@pytest.fixture\ndef bench_dvc(request, dvc_bin, make_bench):\n    def _bench_dvc(*args, **kwargs):\n        name = kwargs.pop(\"name\", None)\n        name = f\"-{name}\" if name else \"\"\n        bench = make_bench(args[0] + name)\n        if request.config.getoption(\"--dvc-benchmark-cprofile-dump\") or kwargs.pop(\n            \"cprofile\", False\n        ):\n            cprofile_results = request.config.invocation_params.dir / \"prof\"\n            cprofile_results.mkdir(exist_ok=True)\n            stats_file = cprofile_results / f\"{bench.name}.prof\"\n            args = (*args, \"--cprofile-dump\", stats_file)\n\n        return bench.pedantic(dvc_bin, args=args, **kwargs)\n\n    return _bench_dvc\n\n\ndef _pull(repo, *args):\n    from dvc.exceptions import CheckoutError, DownloadError\n\n    while True:\n        try:\n            return repo.pull(*args)\n        except (CheckoutError, DownloadError):\n            pass\n\n\n@pytest.fixture\ndef make_dataset(request, bench_config, tmp_dir, dvc_bench_repo):\n    def _make_dataset(\n        dvcfile=False, files=True, cache=False, commit=False, remote=False\n    ):\n        from dvc.repo import Repo\n\n        path = tmp_dir / \"dataset\"\n        root = dvc_bench_repo\n        src = root / \"data\" / bench_config.dataset / \"dataset\"\n        src_dvc = src.with_suffix(\".dvc\")\n\n        dvc = Repo(root)\n\n        _pull(dvc, [str(src_dvc)])\n        if files:\n            shutil.copytree(src, path)\n        if dvcfile:\n            shutil.copy(src.with_suffix(\".dvc\"), path.with_suffix(\".dvc\"))\n        if cache:\n            shutil.copytree(root / \".dvc\" / \"cache\", tmp_dir / \".dvc\" / \"cache\")\n        if remote:\n            assert dvcfile\n            assert not cache\n            assert tmp_dir.dvc\n            # FIXME temporary hack, we should try to push from home repo\n            # directly to this remote instead\n            shutil.copytree(root / \".dvc\" / \"cache\", tmp_dir / \".dvc\" / \"cache\")\n            tmp_dir.dvc.push([str(path.with_suffix(\".dvc\").relative_to(tmp_dir))])\n            shutil.rmtree(tmp_dir / \".dvc\" / \"cache\")\n        if commit:\n            assert dvcfile\n            assert tmp_dir.scm\n            tmp_dir.scm.add([str(path.with_suffix(\".dvc\").relative_to(tmp_dir))])\n            tmp_dir.scm.commit(\"add dataset\")\n        return path\n\n    return _make_dataset\n\n\n@pytest.fixture\ndef dataset(make_dataset):\n    return make_dataset(dvcfile=False, files=True, cache=False)\n\n\n@pytest.fixture\ndef remote_dataset():\n    pytest.skip(\"fixme\")\n\n\n@pytest.fixture\ndef make_project(tmp_path_factory):\n    def _make_project(url, rev=None):\n        path = os.fspath(tmp_path_factory.mktemp(\"dvc-project\"))\n\n        if rev:\n            rev = rev.encode(\"ascii\")\n\n        clone(url, path, branch=rev)\n        return path\n\n    return _make_project\n\n\n@pytest.fixture\ndef project(bench_config, monkeypatch, make_project):\n    rev = bench_config.project_rev\n    url = bench_config.project_repo\n\n    if os.path.isdir(url):\n        path = url\n        assert not rev\n    else:\n        path = make_project(url, rev=rev)\n\n    monkeypatch.chdir(path)\n"
  },
  {
    "path": "dvc/testing/benchmarks/plugin.py",
    "content": "import os\nfrom dataclasses import dataclass, fields\nfrom pathlib import Path\nfrom typing import Optional\n\nDEFAULT_DVC_BIN = \"dvc\"\nDEFAULT_DVC_REPO = os.fspath(Path(__file__).parents[3])\nDEFAULT_PROJECT_REPO = \"https://github.com/iterative/example-get-started\"\n\n\ndef pytest_report_header(config):\n    bconf = config.bench_config\n    return f\"dvc-bench: {bconf}\"\n\n\ndef pytest_generate_tests(metafunc):\n    revs = metafunc.config.getoption(\"--dvc-revs\")\n    if not revs:\n        revs = [None]\n    if \"dvc_rev\" in metafunc.fixturenames:\n        metafunc.parametrize(\"dvc_rev\", revs, scope=\"session\")\n\n\n@dataclass\nclass DVCBenchConfig:\n    dataset: str = \"tiny\"\n    dvc_repo: str = DEFAULT_DVC_REPO\n    dvc_bench_repo: Optional[str] = None\n    project_repo: str = DEFAULT_PROJECT_REPO\n    project_rev: Optional[str] = None\n    dvc_bin: str = DEFAULT_DVC_BIN\n    dvc_revs: Optional[list[str]] = None\n    dvc_install_deps: Optional[str] = None\n\n    def __repr__(self):\n        args = \", \".join(\n            f\"{f.name}={val!r}\"\n            for f in fields(self)\n            if (val := getattr(self, f.name)) != f.default\n        )\n        return f\"{self.__class__.__name__}({args})\"\n\n\ndef pytest_configure(config):\n    config.addinivalue_line(\n        \"markers\",\n        \"requires(spec): mark a test to run only on versions that satisfy the spec\",\n    )\n    config.bench_config = DVCBenchConfig(\n        dataset=config.getoption(\"--dataset\"),\n        dvc_repo=config.getoption(\"--dvc-repo\"),\n        dvc_bench_repo=config.getoption(\"--dvc-bench-repo\"),\n        project_repo=config.getoption(\"--project-repo\"),\n        project_rev=config.getoption(\"--project-rev\"),\n        dvc_bin=config.getoption(\"--dvc-bin\"),\n        dvc_revs=config.getoption(\"--dvc-revs\"),\n        dvc_install_deps=config.getoption(\"--dvc-install-deps\"),\n    )\n\n\ndef resolve_path(path):\n    if os.path.isdir(path):\n        return os.path.abspath(path)\n    return path\n\n\ndef pytest_addoption(parser):\n    parser.addoption(\n        \"--dataset\",\n        type=str,\n        default=\"tiny\",\n        help=\"Dataset name to use in tests (e.g. tiny/small/large/mnist/etc)\",\n    )\n\n    parser.addoption(\n        \"--dvc-benchmark-cprofile-dump\",\n        action=\"store_true\",\n        default=False,\n        help=\"Save cprofile results\",\n    )\n\n    parser.addoption(\n        \"--dvc-bin\",\n        type=str,\n        default=DEFAULT_DVC_BIN,\n        help=\"Path to dvc binary\",\n    )\n\n    parser.addoption(\n        \"--dvc-revs\",\n        type=lambda revs: revs.split(\",\"),\n        help=(\"Comma-separated list of DVC revisions to test (overrides `--dvc-bin`)\"),\n    )\n\n    parser.addoption(\n        \"--dvc-repo\",\n        type=resolve_path,\n        default=DEFAULT_DVC_REPO,\n        help=\"Path or url to dvc git repo\",\n    )\n\n    parser.addoption(\n        \"--dvc-install-deps\",\n        type=str,\n        help=\"Comma-separated list of DVC installation packages\",\n    )\n\n    parser.addoption(\n        \"--dvc-bench-repo\",\n        type=resolve_path,\n        default=None,\n        help=\"Path or url to dvc-bench git repo (for loading benchmark datasets)\",\n    )\n\n    parser.addoption(\"--project-rev\", type=str, help=\"Project revision to test\")\n    parser.addoption(\n        \"--project-repo\",\n        type=resolve_path,\n        default=DEFAULT_PROJECT_REPO,\n        help=\"Path or url to dvc project\",\n    )\n"
  },
  {
    "path": "dvc/testing/cloud.py",
    "content": "import locale\nimport pathlib\nfrom abc import ABC, abstractmethod\n\n\nclass Cloud(ABC):\n    IS_OBJECT_STORAGE = False\n\n    @abstractmethod\n    def is_file(self):\n        pass\n\n    @abstractmethod\n    def is_dir(self):\n        pass\n\n    @abstractmethod\n    def exists(self):\n        pass\n\n    @abstractmethod\n    def mkdir(self, mode=0o777, parents=False, exist_ok=False):\n        pass\n\n    def write_text(self, contents, encoding=None, errors=None):\n        if not encoding:\n            encoding = locale.getpreferredencoding(False)\n        assert errors is None\n        self.write_bytes(contents.encode(encoding))\n\n    @abstractmethod\n    def write_bytes(self, contents):\n        raise NotImplementedError\n\n    @abstractmethod\n    def unlink(self, missing_ok: bool = False) -> None:\n        pass\n\n    @abstractmethod\n    def rmdir(self, recursive: bool = True) -> None:\n        pass\n\n    def read_text(self, encoding=None, errors=None):\n        if not encoding:\n            encoding = locale.getpreferredencoding(False)\n        assert errors is None\n        return self.read_bytes().decode(encoding)\n\n    @abstractmethod\n    def read_bytes(self):\n        pass\n\n    def _gen(self, struct, prefix=None):\n        for name, contents in struct.items():\n            path = (prefix or self) / name\n\n            if isinstance(contents, dict):\n                if not contents:\n                    path.mkdir(parents=True, exist_ok=True)\n                else:\n                    self._gen(contents, prefix=path)\n            else:\n                path.parent.mkdir(parents=True, exist_ok=True)\n                if isinstance(contents, bytes):\n                    path.write_bytes(contents)\n                else:\n                    path.write_text(contents, encoding=\"utf-8\")\n\n    def gen(self, struct, text=\"\"):\n        if isinstance(struct, (str, bytes, pathlib.PurePath)):\n            struct = {struct: text}\n\n        self._gen(struct)\n        return struct.keys()\n\n    def close(self):  # noqa: B027\n        pass\n\n    @staticmethod\n    def should_test():\n        return True\n\n    @staticmethod\n    def get_url():\n        raise NotImplementedError\n\n    @property\n    @abstractmethod\n    def config(self):\n        pass\n"
  },
  {
    "path": "dvc/testing/conftest.py",
    "content": "from .fixtures import *  # noqa: F403\n"
  },
  {
    "path": "dvc/testing/fixtures.py",
    "content": "import os\nimport pathlib\nimport subprocess\n\nimport pytest\n\nfrom .scripts import copy_script\n\n__all__ = [\n    \"cloud\",\n    \"copy_script\",\n    \"docker_compose_project_name\",\n    \"docker_services\",\n    \"dvc\",\n    \"local_cloud\",\n    \"local_remote\",\n    \"local_workspace\",\n    \"make_cloud\",\n    \"make_cloud_version_aware\",\n    \"make_local\",\n    \"make_remote\",\n    \"make_remote_version_aware\",\n    \"make_remote_worktree\",\n    \"make_tmp_dir\",\n    \"make_workspace\",\n    \"remote\",\n    \"remote_version_aware\",\n    \"remote_worktree\",\n    \"run_copy\",\n    \"scm\",\n    \"tmp_dir\",\n    \"workspace\",\n]\n\nCACHE: dict[tuple[bool, bool, bool], str] = {}\n\n\n@pytest.fixture(scope=\"session\")\ndef make_tmp_dir(tmp_path_factory, request, worker_id):\n    def make(name, *, scm=False, dvc=False, subdir=False):\n        from shutil import copytree, ignore_patterns\n\n        from dvc.repo import Repo\n        from dvc.scm import Git\n\n        from .tmp_dir import TmpDir\n\n        cache = CACHE.get((scm, dvc, subdir))\n        if not cache:\n            cache_dir = tmp_path_factory.mktemp(\"dvc-test-cache\" + worker_id)\n            TmpDir(cache_dir).init(scm=scm, dvc=dvc, subdir=subdir)\n            CACHE[(scm, dvc, subdir)] = cache = os.fspath(cache_dir)\n\n        assert cache\n        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name\n\n        # ignore sqlite files from .dvc/tmp. We might not be closing the cache\n        # connection resulting in PermissionErrors in Windows.\n        ignore = ignore_patterns(\"cache.db*\")\n        copytree(cache, path, dirs_exist_ok=True, ignore=ignore)\n        new_dir = TmpDir(path)\n        str_path = os.fspath(new_dir)\n        if dvc:\n            new_dir.dvc = Repo(str_path)\n        if scm:\n            new_dir.scm = new_dir.dvc.scm if hasattr(new_dir, \"dvc\") else Git(str_path)\n        request.addfinalizer(new_dir.close)\n        return new_dir\n\n    return make\n\n\n@pytest.fixture\ndef tmp_dir(tmp_path, make_tmp_dir, request, monkeypatch):\n    monkeypatch.chdir(tmp_path)\n    fixtures = request.fixturenames\n    return make_tmp_dir(tmp_path, scm=\"scm\" in fixtures, dvc=\"dvc\" in fixtures)\n\n\n@pytest.fixture\ndef scm(tmp_dir):\n    return tmp_dir.scm\n\n\n@pytest.fixture\ndef dvc(tmp_dir):\n    with tmp_dir.dvc as _dvc:\n        yield _dvc\n\n\n@pytest.fixture\ndef make_local(make_tmp_dir):\n    def _make_local():\n        return make_tmp_dir(\"local-cloud\")\n\n    return _make_local\n\n\n@pytest.fixture\ndef make_cloud(request):\n    def _make_cloud(typ):\n        return request.getfixturevalue(f\"make_{typ}\")()\n\n    return _make_cloud\n\n\n@pytest.fixture\ndef make_cloud_version_aware(request):\n    def _make_cloud(typ):\n        return request.getfixturevalue(f\"make_{typ}_version_aware\")()\n\n    return _make_cloud\n\n\n@pytest.fixture\ndef cloud(make_cloud, request):\n    typ = getattr(request, \"param\", \"local\")\n    return make_cloud(typ)\n\n\n@pytest.fixture\ndef local_cloud(make_cloud):\n    return make_cloud(\"local\")\n\n\n@pytest.fixture\ndef make_remote(tmp_dir, dvc, make_cloud):  # noqa: ARG001\n    def _make_remote(name, typ=\"local\", **kwargs):\n        cloud = make_cloud(typ)\n        tmp_dir.add_remote(name=name, config=cloud.config, **kwargs)\n        return cloud\n\n    return _make_remote\n\n\n@pytest.fixture\ndef make_remote_version_aware(tmp_dir, dvc, make_cloud_version_aware):  # noqa: ARG001\n    def _make_remote(name, typ=\"local\", **kwargs):\n        cloud = make_cloud_version_aware(typ)\n        config = dict(cloud.config)\n        config[\"version_aware\"] = True\n        tmp_dir.add_remote(name=name, config=config, **kwargs)\n        return cloud\n\n    return _make_remote\n\n\n@pytest.fixture\ndef make_remote_worktree(tmp_dir, dvc, make_cloud_version_aware):  # noqa: ARG001\n    def _make_remote(name, typ=\"local\", **kwargs):\n        cloud = make_cloud_version_aware(typ)\n        config = dict(cloud.config)\n        config[\"worktree\"] = True\n        tmp_dir.add_remote(name=name, config=config, **kwargs)\n        return cloud\n\n    return _make_remote\n\n\n@pytest.fixture\ndef remote(make_remote, request):\n    typ = getattr(request, \"param\", \"local\")\n    return make_remote(\"upstream\", typ=typ)\n\n\n@pytest.fixture\ndef remote_version_aware(make_remote_version_aware, request):\n    typ = getattr(request, \"param\", \"local\")\n    return make_remote_version_aware(\"upstream\", typ=typ)\n\n\n@pytest.fixture\ndef remote_worktree(make_remote_worktree, request):\n    typ = getattr(request, \"param\", \"local\")\n    return make_remote_worktree(\"upstream\", typ=typ)\n\n\n@pytest.fixture\ndef local_remote(make_remote):\n    return make_remote(\"upstream\", typ=\"local\")\n\n\n@pytest.fixture\ndef make_workspace(tmp_dir, dvc, make_cloud):\n    def _make_workspace(name, typ=\"local\"):\n        from dvc.cachemgr import CacheManager\n\n        cloud = make_cloud(typ)\n\n        tmp_dir.add_remote(name=name, config=cloud.config, default=False)\n        tmp_dir.add_remote(\n            name=f\"{name}-cache\", url=\"remote://workspace/cache\", default=False\n        )\n\n        scheme = getattr(cloud, \"scheme\", \"local\")\n        if scheme != \"http\":\n            with dvc.config.edit() as conf:\n                conf[\"cache\"][scheme] = f\"{name}-cache\"\n\n            dvc.cache = CacheManager(dvc)\n\n        return cloud\n\n    return _make_workspace\n\n\n@pytest.fixture\ndef workspace(make_workspace, request):\n    typ = getattr(request, \"param\", \"local\")\n    return make_workspace(\"workspace\", typ=typ)\n\n\n@pytest.fixture\ndef local_workspace(make_workspace):\n    return make_workspace(\"workspace\", typ=\"local\")\n\n\n@pytest.fixture(scope=\"session\")\ndef docker_compose_project_name():\n    return \"pytest-dvc-test\"\n\n\n@pytest.fixture(scope=\"session\")\ndef docker_services(tmp_path_factory, request):\n    from filelock import FileLock\n\n    if os.environ.get(\"CI\") and os.name == \"nt\":\n        pytest.skip(\"disabled for Windows on CI\")\n\n    try:\n        subprocess.check_output(  # noqa: S602\n            \"docker ps\",  # noqa: S607\n            stderr=subprocess.STDOUT,\n            shell=True,\n        )\n    except subprocess.CalledProcessError as err:\n        out = (err.output or b\"\").decode(\"utf-8\")\n        pytest.skip(f\"docker is not installed or the daemon is not running: {out}\")\n\n    try:\n        cmd = \"docker-compose version\"\n        subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)  # noqa: S602\n    except subprocess.CalledProcessError as err:\n        out = (err.output or b\"\").decode(\"utf-8\")\n        pytest.skip(f\"docker-compose is not installed: {out}\")\n\n    # making sure we don't accidentally launch docker-compose in parallel,\n    # as it might result in network conflicts. Inspired by:\n    # https://github.com/pytest-dev/pytest-xdist#making-session-scoped-fixtures-execute-only-once\n    lockfile = tmp_path_factory.getbasetemp().parent / \"docker-compose.lock\"\n    with FileLock(os.fspath(lockfile)):\n        return request.getfixturevalue(\"docker_services\")\n\n\n@pytest.fixture\ndef run_copy(tmp_dir, copy_script, dvc):  # noqa: ARG001\n    def run_copy(src, dst, **run_kwargs):\n        wdir = pathlib.Path(run_kwargs.get(\"wdir\", \".\"))\n        wdir = pathlib.Path(\"../\" * len(wdir.parts))\n        script_path = wdir / \"copy.py\"\n\n        return dvc.run(\n            cmd=f\"python {script_path} {src} {dst}\",\n            outs=[dst],\n            deps=[src, f\"{script_path}\"],\n            **run_kwargs,\n        )\n\n    return run_copy\n"
  },
  {
    "path": "dvc/testing/matchers.py",
    "content": "from collections.abc import Iterable, Mapping\nfrom typing import TYPE_CHECKING, Any, Optional, Union\n\nif TYPE_CHECKING:\n    import builtins\n\n\nclass dict:  # noqa: A001, N801, PLW1641\n    \"\"\"Special class to eq by matching only presented dict keys.\n\n    Implementation notes:\n\n     - can't inherit from dict because that makes D() == M.dict() to not call\n       our __eq__, if D is a subclass of a dict\n\n     - should not call itself dict or use dict in repr because it creates\n       confusing error messages (shadowing python builtins is bad anyway)\n\n    \"\"\"\n\n    def __init__(self, d: Optional[Mapping[Any, Any]] = None, **keys: Any) -> None:\n        self.d: builtins.dict[Any, Any] = {}\n        if d:\n            self.d.update(d)\n        self.d.update(keys)\n\n    def __len__(self) -> int:\n        return len(self.d)\n\n    def __repr__(self) -> str:\n        inner = \", \".join(f\"{k}={v!r}\" for k, v in self.d.items())\n        return f\"{self.__class__.__name__}({inner})\"\n\n    def __eq__(self, other: object) -> bool:\n        assert isinstance(other, Mapping)\n        return all(other.get(name) == v for name, v in self.d.items())\n\n\nclass unordered:  # noqa: N801, PLW1641\n    \"\"\"Compare list contents, but do not care about ordering.\n\n    (E.g. sort lists first, then compare.)\n    If you care about ordering, then just compare lists directly.\"\"\"\n\n    def __init__(self, *items: Any) -> None:\n        self.items = items\n\n    def __repr__(self) -> str:\n        inner = \", \".join(map(repr, self.items))\n        return f\"{self.__class__.__name__}({inner})\"\n\n    def __eq__(self, other: object) -> bool:\n        assert isinstance(other, Iterable)\n        return sorted(self.items) == sorted(other)\n\n\nclass attrs:  # noqa: N801, PLW1641\n    def __init__(self, **attribs: Any) -> None:\n        self.attribs = attribs\n\n    def __repr__(self) -> str:\n        inner = \", \".join(f\"{k}={v!r}\" for k, v in self.attribs.items())\n        return f\"{self.__class__.__name__}({inner})\"\n\n    def __eq__(self, other: object) -> bool:\n        # Unforturnately this doesn't work with classes with slots\n        # self.__class__ = other.__class__\n        return all(getattr(other, name) == v for name, v in self.attribs.items())\n\n\nclass instance_of:  # noqa: N801, PLW1641\n    def __init__(self, expected_type: Union[Any, tuple[Any, ...]]) -> None:\n        self.expected_type = expected_type\n\n    def __repr__(self) -> str:\n        if isinstance(self.expected_type, tuple):\n            inner = f\"({', '.join(t.__name__ for t in self.expected_type)})\"\n        else:\n            inner = self.expected_type.__name__\n        return f\"{self.__class__.__name__}({inner})\"\n\n    def __eq__(self, other: object) -> bool:\n        return isinstance(other, self.expected_type)\n\n\nclass any_of:  # noqa: N801, PLW1641\n    def __init__(self, *items: Any) -> None:\n        self.items = sorted(items)\n\n    def __repr__(self) -> str:\n        inner = \", \".join(map(repr, self.items))\n        return f\"any_of({inner})\"\n\n    def __eq__(self, other: object) -> bool:\n        return other in self.items\n\n\n__all__ = [\n    \"any_of\",\n    \"attrs\",\n    \"dict\",\n    \"instance_of\",\n    \"unordered\",\n]\n"
  },
  {
    "path": "dvc/testing/path_info.py",
    "content": "import os\nimport pathlib\nimport posixpath\nimport sys\nfrom typing import Callable, ClassVar\nfrom urllib.parse import urlparse\n\nfrom dvc.utils import relpath\nfrom dvc.utils.objects import cached_property\n\n\nclass _BasePath:\n    def overlaps(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)  # type: ignore[call-arg]\n        elif self.__class__ != other.__class__:\n            return False\n        return self.isin_or_eq(other) or other.isin(self)\n\n    def isin_or_eq(self, other):\n        return self == other or self.isin(other)  # type: ignore[attr-defined]\n\n\nclass PathInfo(pathlib.PurePath, _BasePath):\n    # Use __slots__ in PathInfo objects following PurePath implementation.\n    # This makes objects smaller and speeds up attribute access.\n    # We don't add any fields so it's empty.\n    __slots__ = ()\n    scheme = \"local\"\n\n    if sys.version_info < (3, 12):\n\n        def __new__(cls, *args):\n            if cls is PathInfo:\n                cls = WindowsPathInfo if os.name == \"nt\" else PosixPathInfo  # noqa: PLW0642\n\n            return cls._from_parts(args)  # type: ignore[attr-defined]\n\n    def as_posix(self):\n        f = self._flavour  # type: ignore[attr-defined]\n        # Unlike original implementation [1] that uses `str()` we actually need\n        # to use `fspath`, because we've overridden `__str__` method to return\n        # relative paths, which will break original `as_posix`.\n        #\n        # [1] https://github.com/python/cpython/blob/v3.7.0/Lib/pathlib.py#L692\n        return self.fspath.replace(f.sep, \"/\")\n\n    def __str__(self):\n        path = self.__fspath__()\n        return relpath(path)\n\n    def __repr__(self):\n        return f\"{type(self).__name__}: '{self}'\"\n\n    # This permits passing it to file utils directly in Python 3.6+\n    def __fspath__(self):\n        return pathlib.PurePath.__str__(self)\n\n    @property\n    def fspath(self):\n        return os.fspath(self)\n\n    url = fspath\n\n    path = fspath\n\n    def relpath(self, other):\n        return self.__class__(relpath(self, other))\n\n    def isin(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)  # type: ignore[arg-type]\n        elif self.__class__ != other.__class__:\n            return False\n        # Use cached casefolded parts to compare paths\n        n = len(other._cparts)\n        return (\n            len(self._cparts) > n  # type: ignore[attr-defined]\n            and self._cparts[:n] == other._cparts  # type: ignore[attr-defined]\n        )\n\n    def relative_to(self, other, *args, **kwargs):\n        # pathlib relative_to raises exception when one path is not a direct\n        # descendant of the other when os.path.relpath would return abspath.\n        # For DVC PathInfo we only need the relpath behavior.\n        # See: https://bugs.python.org/issue40358\n        try:\n            path = super().relative_to(other, *args, **kwargs)\n        except ValueError:\n            path = relpath(self, other)\n        return self.__class__(path)\n\n\nclass WindowsPathInfo(PathInfo, pathlib.PureWindowsPath):\n    pass\n\n\nclass PosixPathInfo(PathInfo, pathlib.PurePosixPath):\n    pass\n\n\nclass _URLPathInfo(PosixPathInfo):\n    def __str__(self):\n        return self.__fspath__()\n\n    __unicode__ = __str__\n\n\nclass _URLPathParents:\n    def __init__(self, src):\n        self.src = src\n        self._parents = self.src._path.parents\n\n    def __len__(self):\n        return len(self._parents)\n\n    def __getitem__(self, idx):\n        return self.src.replace(path=self._parents[idx])\n\n    def __repr__(self):\n        return f\"<{self.src}.parents>\"\n\n\nclass URLInfo(_BasePath):\n    DEFAULT_PORTS: ClassVar[dict[str, int]] = {\n        \"http\": 80,\n        \"https\": 443,\n        \"ssh\": 22,\n        \"hdfs\": 0,\n    }\n\n    def __init__(self, url):\n        p = urlparse(url)\n        assert not p.query\n        assert not p.params\n        assert not p.fragment\n        assert p.password is None\n        self._fill_parts(p.scheme, p.hostname, p.username, p.port, p.path)\n\n    @classmethod\n    def from_parts(\n        cls, scheme=None, host=None, user=None, port=None, path=\"\", netloc=None\n    ):\n        assert bool(host) ^ bool(netloc)\n\n        if netloc is not None:\n            return cls(f\"{scheme}://{netloc}{path}\")\n\n        obj = cls.__new__(cls)\n        obj._fill_parts(scheme, host, user, port, path)\n        return obj\n\n    def _fill_parts(self, scheme, host, user, port, path):\n        assert scheme != \"remote\"\n        assert isinstance(path, (str, bytes, _URLPathInfo))\n\n        self.scheme, self.host, self.user = scheme, host, user\n        self.port = int(port) if port else self.DEFAULT_PORTS.get(self.scheme)\n\n        if isinstance(path, _URLPathInfo):\n            self._spath = str(path)\n            self._path = path\n        else:\n            if path and path[0] != \"/\":\n                path = \"/\" + path  # type: ignore[operator]\n            self._spath = path\n\n    @property\n    def _base_parts(self):\n        return (self.scheme, self.host, self.user, self.port)\n\n    @property\n    def parts(self):\n        return self._base_parts + self._path.parts\n\n    def replace(self, path=None):\n        return self.from_parts(*self._base_parts, path=path)  # type: ignore[misc]\n\n    @cached_property\n    def url(self) -> str:\n        return f\"{self.scheme}://{self.netloc}{self._spath}\"\n\n    def __str__(self):\n        return self.url\n\n    def __repr__(self):\n        return f\"{type(self).__name__}: '{self}'\"\n\n    def __eq__(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)\n        return (\n            self.__class__ == other.__class__\n            and self._base_parts == other._base_parts\n            and self._path == other._path\n        )\n\n    def __hash__(self):\n        return hash(self.parts)\n\n    def __div__(self, other):\n        return self.replace(path=posixpath.join(self._spath, other))\n\n    def joinpath(self, *args):\n        return self.replace(path=posixpath.join(self._spath, *args))\n\n    __truediv__ = __div__\n\n    @property\n    def path(self):\n        return self._spath\n\n    @cached_property\n    def _path(self) -> \"_URLPathInfo\":\n        return _URLPathInfo(self._spath)\n\n    @property\n    def name(self) -> str:\n        return self._path.name\n\n    @cached_property\n    def netloc(self) -> str:\n        netloc = self.host\n        if self.user:\n            netloc = self.user + \"@\" + netloc\n        if self.port and int(self.port) != self.DEFAULT_PORTS.get(self.scheme):\n            netloc += \":\" + str(self.port)\n        return netloc\n\n    @property\n    def bucket(self) -> str:\n        return self.netloc\n\n    @property\n    def parent(self):\n        return self.replace(path=self._path.parent)\n\n    @property\n    def parents(self):\n        return _URLPathParents(self)\n\n    def relative_to(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)\n        if self.__class__ != other.__class__:\n            msg = f\"'{self}' has incompatible class with '{other}'\"\n            raise ValueError(msg)\n        if self._base_parts != other._base_parts:\n            msg = f\"'{self}' does not start with '{other}'\"\n            raise ValueError(msg)\n        return self._path.relative_to(other._path)\n\n    def isin(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)\n        elif self.__class__ != other.__class__:\n            return False\n        return self._base_parts == other._base_parts and self._path.isin(other._path)\n\n\nclass CloudURLInfo(URLInfo):\n    @property\n    def path(self):\n        return self._spath.lstrip(\"/\")\n\n\nclass HTTPURLInfo(URLInfo):\n    __hash__: Callable[[\"HTTPURLInfo\"], int] = URLInfo.__hash__  # type: ignore[assignment]\n\n    def __init__(self, url):\n        p = urlparse(url)\n        stripped = p._replace(params=None, query=None, fragment=None)\n        super().__init__(stripped.geturl())\n        self.params = p.params\n        self.query = p.query\n        self.fragment = p.fragment\n\n    def replace(self, path=None):\n        return self.from_parts(  # type: ignore[misc]\n            *self._base_parts,\n            params=self.params,\n            query=self.query,\n            fragment=self.fragment,\n            path=path,\n        )\n\n    @classmethod\n    def from_parts(\n        cls,\n        scheme=None,\n        host=None,\n        user=None,\n        port=None,\n        path=\"\",\n        netloc=None,\n        params=None,\n        query=None,\n        fragment=None,\n    ):\n        assert bool(host) ^ bool(netloc)\n\n        if netloc is not None:\n            return cls(\n                \"{}://{}{}{}{}{}\".format(\n                    scheme,\n                    netloc,\n                    path,\n                    (\";\" + params) if params else \"\",\n                    (\"?\" + query) if query else \"\",\n                    (\"#\" + fragment) if fragment else \"\",\n                )\n            )\n\n        obj = cls.__new__(cls)\n        obj._fill_parts(scheme, host, user, port, path)\n        obj.params = params\n        obj.query = query\n        obj.fragment = fragment\n        return obj\n\n    @property\n    def _extra_parts(self):\n        return (self.params, self.query, self.fragment)\n\n    @property\n    def parts(self):\n        return self._base_parts + self._path.parts + self._extra_parts\n\n    @cached_property\n    def url(self) -> str:\n        return \"{}://{}{}{}{}{}\".format(\n            self.scheme,\n            self.netloc,\n            self._spath,\n            (\";\" + self.params) if self.params else \"\",\n            (\"?\" + self.query) if self.query else \"\",\n            (\"#\" + self.fragment) if self.fragment else \"\",\n        )\n\n    def __eq__(self, other):\n        if isinstance(other, (str, bytes)):\n            other = self.__class__(other)\n        return (\n            self.__class__ == other.__class__\n            and self._base_parts == other._base_parts\n            and self._path == other._path\n            and self._extra_parts == other._extra_parts\n        )\n\n\nclass WebDAVURLInfo(URLInfo):\n    @cached_property\n    def url(self) -> str:\n        return \"{}://{}{}\".format(\n            self.scheme.replace(\"webdav\", \"http\"), self.netloc, self._spath\n        )\n"
  },
  {
    "path": "dvc/testing/plugin.py",
    "content": "from .benchmarks.fixtures import *  # noqa: F403\nfrom .fixtures import *  # noqa: F403\n\n\ndef pytest_generate_tests(metafunc):\n    from .benchmarks.plugin import pytest_generate_tests as bench_generate_tests\n\n    bench_generate_tests(metafunc)\n\n\ndef pytest_addoption(parser):\n    from .benchmarks.plugin import pytest_addoption as bench_addoption\n\n    bench_addoption(parser)\n\n\ndef pytest_configure(config):\n    from .benchmarks.plugin import pytest_configure as bench_configure\n\n    bench_configure(config)\n\n\ndef pytest_report_header(config):\n    from .benchmarks.plugin import pytest_report_header as bench_report_header\n\n    return bench_report_header(config)\n"
  },
  {
    "path": "dvc/testing/remote_tests.py",
    "content": "import os\nimport shutil\n\nimport pytest\n\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.tree import Tree\n\n\ndef _check_status(status, **kwargs):\n    for key in (\"ok\", \"missing\", \"new\", \"deleted\"):\n        expected = kwargs.get(key, set())\n        assert expected == set(getattr(status, key))\n\n\nclass TestRemote:\n    def test(self, tmp_dir, dvc, remote):\n        (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n        out = stage.outs[0]\n        cache = out.cache_path\n        foo_hash = out.hash_info\n        foo_hashes = out.get_used_objs().get(None, set())\n\n        (stage_dir,) = tmp_dir.dvc_gen(\n            {\n                \"data_dir\": {\n                    \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                    \"data\": \"data\",\n                    \"empty\": \"\",\n                }\n            }\n        )\n\n        out_dir = stage_dir.outs[0]\n        cache_dir = out_dir.cache_path\n        dir_hash = out_dir.hash_info\n        dir_hashes = {dir_hash} | {oid for _, _, oid in out_dir.obj}\n\n        # Check status\n        status = dvc.cloud.status(foo_hashes)\n        _check_status(status, new={foo_hash})\n\n        status_dir = dvc.cloud.status(dir_hashes)\n        _check_status(status_dir, new=dir_hashes)\n\n        # Move cache and check status\n        # See issue https://github.com/treeverse/dvc/issues/4383 for details\n        backup_dir = dvc.cache.local.path + \".backup\"\n        shutil.move(dvc.cache.local.path, backup_dir)\n        status = dvc.cloud.status(foo_hashes)\n        _check_status(status, missing={foo_hash})\n\n        status_dir = dvc.cloud.status(dir_hashes)\n        _check_status(status_dir, missing=dir_hashes)\n\n        # Restore original cache:\n        remove(dvc.cache.local.path)\n        shutil.move(backup_dir, dvc.cache.local.path)\n\n        # Push and check status\n        dvc.cloud.push(foo_hashes)\n        assert os.path.exists(cache)\n        assert os.path.isfile(cache)\n\n        dvc.cloud.push(dir_hashes)\n        assert os.path.isfile(cache_dir)\n\n        status = dvc.cloud.status(foo_hashes)\n        _check_status(status, ok={foo_hash})\n\n        status_dir = dvc.cloud.status(dir_hashes)\n        _check_status(status_dir, ok=dir_hashes)\n\n        # Remove and check status\n        dvc.cache.local.clear()\n\n        status = dvc.cloud.status(foo_hashes)\n        _check_status(status, deleted={foo_hash})\n\n        status_dir = dvc.cloud.status(dir_hashes)\n        _check_status(status_dir, deleted=dir_hashes)\n\n        # Pull and check status\n        dvc.cloud.pull(foo_hashes)\n        assert os.path.exists(cache)\n        assert os.path.isfile(cache)\n        with open(cache, encoding=\"utf-8\") as fd:\n            assert fd.read() == \"foo\"\n\n        dvc.cloud.pull(dir_hashes)\n        assert os.path.isfile(cache_dir)\n\n        status = dvc.cloud.status(foo_hashes)\n        _check_status(status, ok={foo_hash})\n\n        status_dir = dvc.cloud.status(dir_hashes)\n        _check_status(status_dir, ok=dir_hashes)\n\n    def test_stage_cache_push_pull(self, tmp_dir, dvc, remote):\n        if remote.scheme in (\"http\", \"https\"):\n            pytest.skip(\"HTTP remote does not support stage cache\")\n\n        tmp_dir.gen(\"foo\", \"foo\")\n        stage = dvc.stage.add(\n            deps=[\"foo\"], outs=[\"bar\"], name=\"copy-foo-bar\", cmd=\"cp foo bar\"\n        )\n        dvc.reproduce(stage.addressing)\n        assert dvc.push(run_cache=True) == 2\n\n        stage_cache_dir = tmp_dir / dvc.stage_cache.cache_dir\n        expected = list(stage_cache_dir.rglob(\"*\"))\n        shutil.rmtree(stage_cache_dir)\n\n        dvc.pull(run_cache=True)\n        assert list(stage_cache_dir.rglob(\"*\")) == expected\n\n    @pytest.mark.xfail(raises=NotImplementedError, strict=False)\n    def test_pull_00_prefix(self, tmp_dir, dvc, remote, monkeypatch):\n        # Related: https://github.com/treeverse/dvc/issues/6089\n\n        fs_type = type(dvc.cloud.get_remote_odb(\"upstream\").fs)\n        monkeypatch.setattr(fs_type, \"_ALWAYS_TRAVERSE\", True, raising=False)\n        monkeypatch.setattr(fs_type, \"LIST_OBJECT_PAGE_SIZE\", 256, raising=False)\n\n        # foo's md5 checksum is 00411460f7c92d2124a67ea0f4cb5f85\n        # bar's md5 checksum is 0000000018e6137ac2caab16074784a6\n        foo_out = tmp_dir.dvc_gen(\"foo\", \"363\")[0].outs[0]\n        bar_out = tmp_dir.dvc_gen(\"bar\", \"jk8ssl\")[0].outs[0]\n        expected_hashes = {foo_out.hash_info, bar_out.hash_info}\n\n        dvc.push()\n        status = dvc.cloud.status(expected_hashes)\n        _check_status(status, ok=expected_hashes)\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"foo\")\n        remove(tmp_dir / \"bar\")\n\n        stats = dvc.pull()\n        assert stats == {\n            \"added\": [\"bar\", \"foo\"],\n            \"deleted\": [],\n            \"modified\": [],\n            \"stats\": {\"fetched\": 2, \"added\": 2, \"deleted\": 0, \"modified\": 0},\n        }\n\n    @pytest.mark.xfail(raises=NotImplementedError, strict=False)\n    def test_pull_no_00_prefix(self, tmp_dir, dvc, remote, monkeypatch):\n        # Related: https://github.com/treeverse/dvc/issues/6244\n\n        fs_type = type(dvc.cloud.get_remote_odb(\"upstream\").fs)\n        monkeypatch.setattr(fs_type, \"_ALWAYS_TRAVERSE\", True, raising=False)\n        monkeypatch.setattr(fs_type, \"LIST_OBJECT_PAGE_SIZE\", 256, raising=False)\n\n        # foo's md5 checksum is 14ffd92a6cbf5f2f657067df0d5881a6\n        # bar's md5 checksum is 64020400f00960c0ef04052547b134b3\n        foo_out = tmp_dir.dvc_gen(\"foo\", \"dvc\")[0].outs[0]\n        bar_out = tmp_dir.dvc_gen(\"bar\", \"cml\")[0].outs[0]\n        expected_hashes = {foo_out.hash_info, bar_out.hash_info}\n\n        dvc.push()\n        status = dvc.cloud.status(expected_hashes)\n        _check_status(status, ok=expected_hashes)\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"foo\")\n        remove(tmp_dir / \"bar\")\n\n        stats = dvc.pull()\n        assert stats == {\n            \"added\": [\"bar\", \"foo\"],\n            \"deleted\": [],\n            \"modified\": [],\n            \"stats\": {\"fetched\": 2, \"added\": 2, \"deleted\": 0, \"modified\": 0},\n        }\n\n\nclass TestRemoteVersionAware:\n    def test_file(self, tmp_dir, dvc, run_copy, remote_version_aware):\n        (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n        run_copy(\"foo\", \"foo_copy\", name=\"copy\")\n\n        assert dvc.push()\n        assert (remote_version_aware / \"foo\").read_text() == \"foo\"\n        assert (remote_version_aware / \"foo_copy\").read_text() == \"foo\"\n        foo_dvc = (tmp_dir / \"foo.dvc\").read_text()\n        assert \"version_id\" in foo_dvc\n        stage = stage.reload()\n        out = stage.outs[0]\n        assert out.meta.version_id\n        dvc_lock = (tmp_dir / \"dvc.lock\").read_text()\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"foo\")\n        remove(tmp_dir / \"foo_copy\")\n\n        assert dvc.pull()\n        assert (tmp_dir / \"foo\").read_text() == \"foo\"\n        assert (tmp_dir / \"foo_copy\").read_text() == \"foo\"\n        assert (tmp_dir / \"foo.dvc\").read_text() == foo_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() == dvc_lock\n\n        assert not dvc.push()\n        assert (remote_version_aware / \"foo\").read_text() == \"foo\"\n        assert (remote_version_aware / \"foo_copy\").read_text() == \"foo\"\n        assert (tmp_dir / \"foo.dvc\").read_text() == foo_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() == dvc_lock\n\n        dvc.reproduce()\n        assert not dvc.push()\n        assert (remote_version_aware / \"foo\").read_text() == \"foo\"\n        assert (remote_version_aware / \"foo_copy\").read_text() == \"foo\"\n        assert (tmp_dir / \"foo.dvc\").read_text() == foo_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() == dvc_lock\n\n    def test_dir(self, tmp_dir, dvc, run_copy, remote_version_aware):  # noqa: PLR0915\n        (stage,) = tmp_dir.dvc_gen(\n            {\n                \"data_dir\": {\n                    \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                    \"data\": \"data\",\n                    \"empty\": \"\",\n                }\n            }\n        )\n\n        assert not dvc.fetch()\n        assert dvc.push()\n\n        data_dir_dvc = (tmp_dir / \"data_dir.dvc\").read_text()\n        assert \"files\" in data_dir_dvc\n        assert \"version_id\" in data_dir_dvc\n        stage = stage.reload()\n        out = stage.outs[0]\n        assert out.files\n        for file in out.files:\n            assert file[\"version_id\"]\n            assert file[\"remote\"] == \"upstream\"\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"data_dir\")\n\n        assert dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"data\"\n        assert (\n            tmp_dir / \"data_dir\" / \"data_sub_dir\" / \"data_sub\"\n        ).read_text() == \"data_sub\"\n        assert (tmp_dir / \"data_dir.dvc\").read_text() == data_dir_dvc\n\n        run_copy(\"data_dir\", \"data_dir_copy\", name=\"copy\")\n        dvc_lock = (tmp_dir / \"dvc.lock\").read_text()\n\n        assert dvc.push()\n        assert (remote_version_aware / \"data_dir\").exists()\n        assert (remote_version_aware / \"data_dir\" / \"data\").exists()\n        assert (remote_version_aware / \"data_dir_copy\").exists()\n        assert (remote_version_aware / \"data_dir_copy\" / \"data\").exists()\n        assert (tmp_dir / \"data_dir.dvc\").read_text() == data_dir_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() != dvc_lock\n        dvc_lock = (tmp_dir / \"dvc.lock\").read_text()\n\n        assert not dvc.push()\n        assert (remote_version_aware / \"data_dir\").exists()\n        assert (remote_version_aware / \"data_dir\" / \"data\").exists()\n        assert (remote_version_aware / \"data_dir_copy\").exists()\n        assert (remote_version_aware / \"data_dir_copy\" / \"data\").exists()\n        assert (tmp_dir / \"data_dir.dvc\").read_text() == data_dir_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() == dvc_lock\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"data_dir\")\n        remove(tmp_dir / \"data_dir_copy\")\n        assert not dvc.push()\n        assert (remote_version_aware / \"data_dir\").exists()\n        assert (remote_version_aware / \"data_dir\" / \"data\").exists()\n        assert (remote_version_aware / \"data_dir_copy\").exists()\n        assert (remote_version_aware / \"data_dir_copy\" / \"data\").exists()\n        assert (tmp_dir / \"data_dir.dvc\").read_text() == data_dir_dvc\n        assert (tmp_dir / \"dvc.lock\").read_text() == dvc_lock\n\n        (remote_version_aware / \"data_dir\").rmdir()\n        (remote_version_aware / \"data_dir_copy\").rmdir()\n        assert not (remote_version_aware / \"data_dir\").exists()\n        assert not (remote_version_aware / \"data_dir_copy\").exists()\n        assert dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"data\"\n        assert (\n            tmp_dir / \"data_dir\" / \"data_sub_dir\" / \"data_sub\"\n        ).read_text() == \"data_sub\"\n        assert (tmp_dir / \"data_dir_copy\" / \"data\").read_text() == \"data\"\n        assert (\n            tmp_dir / \"data_dir_copy\" / \"data_sub_dir\" / \"data_sub\"\n        ).read_text() == \"data_sub\"\n\n\nclass TestRemoteWorktree:\n    def test_file(self, tmp_dir, dvc, remote_worktree):\n        (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n        dvc.push()\n        assert \"version_id\" in (tmp_dir / \"foo.dvc\").read_text()\n        stage = stage.reload()\n        out = stage.outs[0]\n        assert out.meta.version_id\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"foo\")\n\n        dvc.pull()\n        assert (tmp_dir / \"foo\").read_text() == \"foo\"\n\n    def test_dir(self, tmp_dir, dvc, remote_worktree):\n        (stage,) = tmp_dir.dvc_gen(\n            {\n                \"data_dir\": {\n                    \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                    \"data\": \"data\",\n                    \"empty\": \"\",\n                }\n            }\n        )\n\n        dvc.push()\n        assert \"files\" in (tmp_dir / \"data_dir.dvc\").read_text()\n        assert \"version_id\" in (tmp_dir / \"data_dir.dvc\").read_text()\n        stage = stage.reload()\n        out = stage.outs[0]\n        assert out.files\n        for file in out.files:\n            assert file[\"version_id\"]\n            assert file[\"remote\"] == \"upstream\"\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"data_dir\")\n\n        dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"data\"\n        assert (\n            tmp_dir / \"data_dir\" / \"data_sub_dir\" / \"data_sub\"\n        ).read_text() == \"data_sub\"\n\n    def test_deletion(self, tmp_dir, dvc, scm, remote_worktree):\n        tmp_dir.dvc_gen(\n            {\n                \"data_dir\": {\n                    \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                    \"data\": \"data\",\n                    \"empty\": \"\",\n                }\n            }\n        )\n        dvc.push()\n        assert (remote_worktree / \"data_dir\" / \"data\").exists()\n        tmp_dir.scm_add([tmp_dir / \"data_dir.dvc\"], commit=\"v1\")\n        v1 = scm.get_rev()\n        remove(tmp_dir / \"data_dir\" / \"data\")\n        dvc.add(str(tmp_dir / \"data_dir\"))\n\n        # data_dir/data should show as deleted in the remote\n        dvc.push()\n        tmp_dir.scm_add([tmp_dir / \"data_dir.dvc\"], commit=\"v2\")\n        assert not (remote_worktree / \"data_dir\" / \"data\").exists()\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"data_dir\")\n        # pulling the original pushed version should still succeed\n        scm.checkout(v1)\n        dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"data\"\n\n    def test_update(self, tmp_dir, dvc, remote_worktree):\n        (foo_stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n        (data_dir_stage,) = tmp_dir.dvc_gen(\n            {\n                \"data_dir\": {\n                    \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                    \"data\": \"data\",\n                    \"empty\": \"\",\n                }\n            }\n        )\n        dvc.push()\n        orig_foo = foo_stage.reload().outs[0]\n        orig_data_dir = data_dir_stage.reload().outs[0]\n        (remote_worktree / \"foo\").write_text(\"bar\")\n        (remote_worktree / \"data_dir\" / \"data\").write_text(\"modified\")\n        (remote_worktree / \"data_dir\" / \"new_data\").write_text(\"new data\")\n\n        dvc.update([str(tmp_dir / \"foo.dvc\"), str(tmp_dir / \"data_dir.dvc\")])\n        updated_foo = foo_stage.reload().outs[0]\n        updated_data_dir = data_dir_stage.reload().outs[0]\n\n        assert updated_foo.meta.version_id\n        assert updated_foo.meta.version_id != orig_foo.meta.version_id\n        updated_data_dir = data_dir_stage.reload().outs[0]\n        orig_tree = orig_data_dir.get_obj()\n        updated_tree = Tree.from_list(updated_data_dir.files, hash_name=\"md5\")\n        assert orig_tree.get((\"data_sub_dir\", \"data_sub\")) == updated_tree.get(\n            (\"data_sub_dir\", \"data_sub\")\n        )\n        orig_meta, _ = orig_tree.get((\"data\",))\n        updated_meta, _ = updated_tree.get((\"data\",))\n        assert orig_meta.version_id\n        assert updated_meta.version_id\n        assert orig_meta.version_id != updated_meta.version_id\n        meta, hash_info = updated_tree.get((\"new_data\",))\n        assert meta\n        assert hash_info\n\n        assert (tmp_dir / \"foo\").read_text() == \"bar\"\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"modified\"\n        assert (tmp_dir / \"data_dir\" / \"new_data\").read_text() == \"new data\"\n\n        remove(dvc.cache.local.path)\n        remove(tmp_dir / \"foo\")\n        remove(tmp_dir / \"data_dir\")\n        dvc.pull()\n        assert (tmp_dir / \"foo\").read_text() == \"bar\"\n        assert (tmp_dir / \"data_dir\" / \"data\").read_text() == \"modified\"\n        assert (tmp_dir / \"data_dir\" / \"new_data\").read_text() == \"new data\"\n"
  },
  {
    "path": "dvc/testing/scripts.py",
    "content": "import pytest\n\nCOPY_SCRIPT = \"\"\"\nimport os\nimport shutil\nimport sys\n\nif os.path.isfile(sys.argv[1]):\n    shutil.copyfile(sys.argv[1], sys.argv[2])\nelse:\n    shutil.copytree(sys.argv[1], sys.argv[2])\n\"\"\".replace(\"\\r\\n\", \"\\n\")\n\n\ndef _add_script(tmp_dir, path, contents=\"\"):\n    script, *_ = tmp_dir.gen(path, contents.encode(\"utf-8\"))\n    if hasattr(tmp_dir, \"scm\"):\n        tmp_dir.scm_add(path, commit=f\"add {path}\")\n    return script.fs_path\n\n\n@pytest.fixture\ndef copy_script(tmp_dir):\n    return _add_script(tmp_dir, \"copy.py\", COPY_SCRIPT)\n"
  },
  {
    "path": "dvc/testing/tmp_dir.py",
    "content": "\"\"\"\nThe goal of this module is making dvc functional tests setup a breeze. This\nincludes a temporary dir, initializing git and DVC repos and bootstrapping some\nfile structure.\n\nThe cornerstone of these fixtures is `tmp_dir`, which creates a temporary dir\nand changes path to it, it might be combined with `scm` and `dvc` to initialize\nempty git and DVC repos. `tmp_dir` returns a Path instance, which should save\nyou from using `open()`, `os` and `os.path` utils many times:\n\n    (tmp_dir / \"some_file\").write_text(\"some text\")\n    # ...\n    assert \"some text\" == (tmp_dir / \"some_file\").read_text()\n    assert (tmp_dir / \"some_file\").exists()\n\nAdditionally it provides `.gen()`, `.scm_gen()` and `.dvc_gen()` methods to\nbootstrap a required file structure in a single call:\n\n    # Generate a dir with files\n    tmp_dir.gen({\"dir\": {\"file\": \"file text\", \"second_file\": \"...\"}})\n\n    # Generate a single file, dirs will be created along the way\n    tmp_dir.gen(\"dir/file\", \"file text\")\n\n    # Generate + git add\n    tmp_dir.scm_gen({\"file1\": \"...\", ...})\n\n    # Generate + git add + git commit\n    tmp_dir.scm_gen({\"file1\": \"...\", ...}, commit=\"add files\")\n\n    # Generate + dvc add\n    tmp_dir.dvc_gen({\"file1\": \"...\", ...})\n\n    # Generate + dvc add + git commit -am \"...\"\n    # This commits stages to git not the generated files.\n    tmp_dir.dvc_gen({\"file1\": \"...\", ...}, commit=\"add files\")\n\nMaking it easier to bootstrap things has a supergoal of incentivizing a move\nfrom global repo template to creating everything inplace, which:\n\n    - makes all path references local to test, enhancing readability\n    - allows using telling filenames, e.g. \"git_tracked_file\" instead of \"foo\"\n    - does not create unnecessary files\n\"\"\"\n\nimport os\nimport pathlib\nimport sys\nfrom contextlib import contextmanager\nfrom functools import partialmethod\n\nfrom dvc.utils import serialize\n\n\nclass TmpDir(pathlib.Path):\n    scheme = \"local\"\n\n    @property\n    def fs_path(self):\n        return os.fspath(self)\n\n    @property\n    def url(self):\n        return self.fs_path\n\n    @property\n    def config(self):\n        return {\"url\": self.url}\n\n    if sys.version_info < (3, 12):\n\n        def __new__(cls, *args, **kwargs):\n            if cls is TmpDir:\n                cls = WindowsTmpDir if os.name == \"nt\" else PosixTmpDir  # noqa: PLW0642\n\n            # init parameter and `_init` method has been removed in Python 3.10.\n            kw = {\"init\": False} if sys.version_info < (3, 10) else {}\n            self = cls._from_parts(args, **kw)  # type: ignore[attr-defined]\n            if not self._flavour.is_supported:\n                raise NotImplementedError(\n                    f\"cannot instantiate {cls.__name__!r} on your system\"\n                )\n            if sys.version_info < (3, 10):\n                self._init()\n            return self\n\n    def init(self, *, scm=False, dvc=False, subdir=False):\n        from dvc.repo import Repo\n        from dvc.scm import Git\n\n        assert not scm or not hasattr(self, \"scm\")\n        assert not dvc or not hasattr(self, \"dvc\")\n\n        if scm:\n            Git.init(self.fs_path).close()\n        if dvc:\n            self.dvc = Repo.init(\n                self.fs_path,\n                no_scm=not scm and not hasattr(self, \"scm\"),\n                subdir=subdir,\n            )\n        if scm:\n            self.scm = self.dvc.scm if hasattr(self, \"dvc\") else Git(self.fs_path)\n        if dvc and hasattr(self, \"scm\"):\n            self.scm.commit(\"init dvc\")\n\n    def close(self):\n        if hasattr(self, \"scm\"):\n            self.scm.close()\n        if hasattr(self, \"dvc\"):\n            self.dvc.close()\n\n    def _require(self, name):\n        if not hasattr(self, name):\n            raise TypeError(\n                f\"Can't use {name} for this temporary dir. \"\n                f'Did you forget to use \"{name}\" fixture?'\n            )\n\n    # Bootstrapping methods\n    def gen(self, struct, text=\"\"):\n        if isinstance(struct, (str, bytes, pathlib.PurePath)):\n            struct = {struct: text}\n\n        return self._gen(struct)\n\n    def _gen(self, struct, prefix=None):\n        paths = []\n        for name, contents in struct.items():\n            path = (prefix or self) / name\n\n            if isinstance(contents, dict):\n                if not contents:\n                    os.makedirs(path, exist_ok=True)\n                else:\n                    self._gen(contents, prefix=path)\n            else:\n                os.makedirs(path.parent, exist_ok=True)\n                if isinstance(contents, bytes):\n                    path.write_bytes(contents)\n                else:\n                    path.write_text(contents, encoding=\"utf-8\")\n            paths.append(path)\n        return paths\n\n    def dvc_gen(self, struct, text=\"\", commit=None):\n        paths = self.gen(struct, text)\n        return self.dvc_add(paths, commit=commit)\n\n    def scm_gen(self, struct, text=\"\", commit=None, force=False):\n        paths = self.gen(struct, text)\n        return self.scm_add(paths, commit=commit, force=force)\n\n    def commit(self, output_paths, msg, force=False):\n        def to_gitignore(stage_path):\n            from dvc.scm import Git\n\n            return os.path.join(os.path.dirname(stage_path), Git.GITIGNORE)\n\n        gitignores = [\n            to_gitignore(s) for s in output_paths if os.path.exists(to_gitignore(s))\n        ]\n        return self.scm_add(output_paths + gitignores, commit=msg, force=force)\n\n    def dvc_add(self, filenames, commit=None):\n        self._require(\"dvc\")\n        filenames = _coerce_filenames(filenames)\n\n        stages = self.dvc.add(filenames)\n        if commit:\n            self.commit([s.path for s in stages], msg=commit)\n        return stages\n\n    def scm_add(self, filenames, commit=None, force=False):\n        from dvc.scm import Git\n\n        self._require(\"scm\")\n        filenames = _coerce_filenames(filenames)\n        assert isinstance(self.scm, Git)\n        self.scm.add(filenames, force=force)\n        if commit:\n            self.scm.commit(commit)\n\n    def add_remote(self, *, url=None, config=None, name=\"upstream\", default=True):\n        self._require(\"dvc\")\n\n        assert bool(url) ^ bool(config)\n\n        if url:\n            config = {\"url\": url}\n\n        with self.dvc.config.edit() as conf:\n            conf[\"remote\"][name] = config\n            if default:\n                conf[\"core\"][\"remote\"] = name\n\n        if hasattr(self, \"scm\"):\n            self.scm.add(self.dvc.config.files[\"repo\"])\n            self.scm.commit(f\"add '{name}' remote\")\n\n        return url or config[\"url\"]\n\n    # contexts\n    @contextmanager\n    def chdir(self):\n        old = os.getcwd()\n        try:\n            os.chdir(self)\n            yield\n        finally:\n            os.chdir(old)\n\n    @contextmanager\n    def branch(self, name, new=False):\n        self._require(\"scm\")\n        old = self.scm.active_branch()\n        try:\n            self.scm.checkout(name, create_new=new)\n            yield\n        finally:\n            self.scm.checkout(old)\n\n    def read_text(self, *args, **kwargs):\n        # NOTE: on windows we'll get PermissionError instead of\n        # IsADirectoryError when we try to `open` a directory, so we can't\n        # rely on exception flow control\n        if self.is_dir():\n            return {\n                path.name: path.read_text(*args, **kwargs) for path in self.iterdir()\n            }\n        kwargs.setdefault(\"encoding\", \"utf-8\")  # type: ignore[call-overload]\n        return super().read_text(*args, **kwargs)\n\n    def oid_to_path(self, hash_):\n        return str(self / hash_[0:2] / hash_[2:])\n\n    def dump(self, *args, **kwargs):\n        return serialize.DUMPERS[self.suffix](self, *args, **kwargs)\n\n    def parse(self, *args, **kwargs):\n        return serialize.LOADERS[self.suffix](self, *args, **kwargs)\n\n    def modify(self, *args, **kwargs):\n        return serialize.MODIFIERS[self.suffix](self, *args, **kwargs)\n\n    load_yaml = partialmethod(serialize.load_yaml)\n    dump_yaml = partialmethod(serialize.dump_yaml)\n    load_json = partialmethod(serialize.load_json)\n    dump_json = partialmethod(serialize.dump_json)\n    load_toml = partialmethod(serialize.load_toml)\n    dump_toml = partialmethod(serialize.dump_toml)\n\n\ndef make_subrepo(dir_: TmpDir, scm, config=None):\n    dir_.mkdir(parents=True, exist_ok=True)\n    with dir_.chdir():\n        dir_.scm = scm\n        dir_.init(dvc=True, subdir=True)\n        if config:\n            dir_.add_remote(config=config)\n\n\ndef _coerce_filenames(filenames):\n    if isinstance(filenames, (str, bytes, pathlib.PurePath)):\n        filenames = [filenames]\n    return list(map(os.fspath, filenames))\n\n\nclass WindowsTmpDir(TmpDir, pathlib.PureWindowsPath):\n    pass\n\n\nclass PosixTmpDir(TmpDir, pathlib.PurePosixPath):\n    pass\n"
  },
  {
    "path": "dvc/testing/workspace_tests.py",
    "content": "import os\nfrom typing import Union\n\nimport pytest\nfrom funcy import first\n\nfrom dvc.exceptions import URLMissingError\nfrom dvc.repo import Repo\nfrom dvc.repo.ls_url import ls_url, parse_external_url\nfrom dvc.utils.fs import remove\n\n\nclass TestImport:\n    def test_import(self, tmp_dir, dvc, workspace):\n        workspace.gen(\"file\", \"file\")\n        assert not (tmp_dir / \"file\").exists()  # sanity check\n        dvc.imp_url(\"remote://workspace/file\")\n        assert (tmp_dir / \"file\").read_text() == \"file\"\n        assert dvc.status() == {}\n\n    @pytest.fixture\n    def stage_md5(self):\n        pytest.skip()\n\n    @pytest.fixture\n    def dir_md5(self):\n        pytest.skip()\n\n    def test_import_dir(self, tmp_dir, dvc, workspace, stage_md5, dir_md5):\n        from dvc.cachemgr import CacheManager\n\n        workspace.gen({\"dir\": {\"file\": \"file\", \"subdir\": {\"subfile\": \"subfile\"}}})\n\n        # remove external cache to make sure that we don't need it\n        # to import dirs\n        with dvc.config.edit() as conf:\n            del conf[\"cache\"]\n        dvc.cache = CacheManager(dvc)\n\n        assert not (tmp_dir / \"dir\").exists()  # sanity check\n        dvc.imp_url(\"remote://workspace/dir\")\n        assert set(os.listdir(tmp_dir / \"dir\")) == {\"file\", \"subdir\"}\n        assert (tmp_dir / \"dir\" / \"file\").read_text() == \"file\"\n        assert list(os.listdir(tmp_dir / \"dir\" / \"subdir\")) == [\"subfile\"]\n        assert (tmp_dir / \"dir\" / \"subdir\" / \"subfile\").read_text() == \"subfile\"\n\n        assert dvc.status() == {}\n\n        if stage_md5 is not None and dir_md5 is not None:\n            assert (tmp_dir / \"dir.dvc\").read_text() == (\n                f\"md5: {stage_md5}\\n\"\n                \"frozen: true\\n\"\n                \"deps:\\n\"\n                f\"- md5: {dir_md5}\\n\"\n                \"  size: 11\\n\"\n                \"  nfiles: 2\\n\"\n                \"  hash: md5\\n\"\n                \"  path: remote://workspace/dir\\n\"\n                \"outs:\\n\"\n                \"- md5: b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\\n\"\n                \"  size: 11\\n\"\n                \"  nfiles: 2\\n\"\n                \"  hash: md5\\n\"\n                \"  path: dir\\n\"\n            )\n\n    @pytest.fixture\n    def is_object_storage(self):\n        pytest.skip()\n\n    def test_import_empty_dir(self, tmp_dir, dvc, workspace, is_object_storage):\n        # prefix based storage services (e.g s3) doesn't have the real concept\n        # of directories. So instead we create an empty file that ends with a\n        # trailing slash in order to actually support this operation\n        if is_object_storage:\n            contents: Union[str, dict[str, str]] = \"\"\n        else:\n            contents = {}\n\n        workspace.gen({\"empty_dir/\": contents})\n\n        dvc.imp_url(\"remote://workspace/empty_dir/\")\n\n        empty_dir = tmp_dir / \"empty_dir\"\n        assert empty_dir.is_dir()\n        assert tuple(empty_dir.iterdir()) == ()\n\n\nclass TestImportURLVersionAware:\n    def test_import_file(self, tmp_dir, dvc, remote_version_aware):\n        remote_version_aware.gen(\"file\", \"file\")\n        dvc.imp_url(\"remote://upstream/file\", version_aware=True)\n        stage = first(dvc.index.stages)\n        assert not stage.outs[0].can_push\n        assert (tmp_dir / \"file\").read_text() == \"file\"\n        assert dvc.status() == {}\n\n        orig_version_id = stage.deps[0].meta.version_id\n        orig_def_path = stage.deps[0].def_path\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"file\")\n        dvc.pull()\n        assert (tmp_dir / \"file\").read_text() == \"file\"\n\n        (remote_version_aware / \"file\").write_text(\"modified\")\n        assert dvc.status().get(\"file.dvc\") == [\n            {\"changed deps\": {\"remote://upstream/file\": \"update available\"}},\n            {\"changed outs\": {\"file\": \"not in cache\"}},\n        ]\n        dvc.update(str(tmp_dir / \"file.dvc\"))\n        assert (tmp_dir / \"file\").read_text() == \"modified\"\n        assert dvc.status() == {}\n\n        stage = first(dvc.index.stages)\n        assert orig_version_id != stage.deps[0].meta.version_id\n        assert orig_def_path == stage.deps[0].def_path\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"file\")\n        dvc.pull()\n        assert (tmp_dir / \"file\").read_text() == \"modified\"\n\n    def test_import_dir(self, tmp_dir, dvc, remote_version_aware):\n        remote_version_aware.gen({\"data_dir\": {\"subdir\": {\"file\": \"file\"}}})\n        dvc.imp_url(\"remote://upstream/data_dir\", version_aware=True)\n        stage = first(dvc.index.stages)\n        assert not stage.outs[0].can_push\n        assert (tmp_dir / \"data_dir\" / \"subdir\" / \"file\").read_text() == \"file\"\n        assert dvc.status() == {}\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"data_dir\")\n        dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"subdir\" / \"file\").read_text() == \"file\"\n\n        (remote_version_aware / \"data_dir\" / \"subdir\" / \"file\").write_text(\"modified\")\n        (remote_version_aware / \"data_dir\" / \"new_file\").write_text(\"new\")\n        assert dvc.status().get(\"data_dir.dvc\") == [\n            {\"changed deps\": {\"remote://upstream/data_dir\": \"modified\"}},\n            {\"changed outs\": {\"data_dir\": \"not in cache\"}},\n        ]\n        dvc.update(str(tmp_dir / \"data_dir.dvc\"))\n        assert (tmp_dir / \"data_dir\" / \"subdir\" / \"file\").read_text() == \"modified\"\n        assert (tmp_dir / \"data_dir\" / \"new_file\").read_text() == \"new\"\n        assert dvc.status() == {}\n\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"data_dir\")\n        dvc.pull()\n        assert (tmp_dir / \"data_dir\" / \"subdir\" / \"file\").read_text() == \"modified\"\n        assert (tmp_dir / \"data_dir\" / \"new_file\").read_text() == \"new\"\n\n    def test_import_no_download(self, tmp_dir, dvc, remote_version_aware, scm):\n        remote_version_aware.gen({\"data_dir\": {\"subdir\": {\"file\": \"file\"}}})\n        dvc.imp_url(\"remote://upstream/data_dir\", version_aware=True, no_download=True)\n        scm.add([\"data_dir.dvc\", \".gitignore\"])\n        scm.commit(\"v1\")\n        scm.tag(\"v1\")\n\n        stage = first(dvc.index.stages)\n        assert not stage.outs[0].can_push\n\n        (remote_version_aware / \"data_dir\" / \"foo\").write_text(\"foo\")\n        dvc.update(no_download=True)\n        assert dvc.pull() == {\n            \"modified\": [],\n            \"added\": [\"data_dir\" + os.sep],\n            \"deleted\": [],\n            \"stats\": {\"fetched\": 2, \"modified\": 0, \"added\": 2, \"deleted\": 0},\n        }\n        assert (tmp_dir / \"data_dir\").read_text() == {\n            \"foo\": \"foo\",\n            \"subdir\": {\"file\": \"file\"},\n        }\n        scm.add([\"data_dir.dvc\", \".gitignore\"])\n        scm.commit(\"update\")\n\n        scm.checkout(\"v1\")\n        dvc.cache.local.clear()\n        remove(tmp_dir / \"data_dir\")\n        assert dvc.pull() == {\n            \"modified\": [],\n            \"added\": [\"data_dir\" + os.sep],\n            \"deleted\": [],\n            \"stats\": {\"fetched\": 1, \"modified\": 0, \"added\": 1, \"deleted\": 0},\n        }\n        assert (tmp_dir / \"data_dir\").read_text() == {\"subdir\": {\"file\": \"file\"}}\n\n        dvc.commit(force=True)\n        assert dvc.status() == {}\n\n\ndef match_files(fs, entries, expected):\n    entries_content = {(fs.normpath(d[\"path\"]), d[\"isdir\"]) for d in entries}\n    expected_content = {(fs.normpath(d[\"path\"]), d[\"isdir\"]) for d in expected}\n    assert entries_content == expected_content\n\n\nclass TestLsUrl:\n    @pytest.mark.parametrize(\"fname\", [\"foo\", \"foo.dvc\", \"dir/foo\"])\n    def test_file(self, cloud, fname):\n        cloud.gen({fname: \"foo contents\"})\n        fs, fs_path = parse_external_url(cloud.url, cloud.config)\n        result = ls_url(str(cloud / fname), fs_config=cloud.config)\n        match_files(fs, result, [{\"path\": fs.join(fs_path, fname), \"isdir\": False}])\n\n    def test_dir(self, cloud):\n        cloud.gen({\"dir/foo\": \"foo contents\", \"dir/subdir/bar\": \"bar contents\"})\n        if not (cloud / \"dir\").is_dir():\n            pytest.skip(\"Cannot create directories on this cloud\")\n        fs, _ = parse_external_url(cloud.url, cloud.config)\n        result = ls_url(str(cloud / \"dir\"), fs_config=cloud.config)\n        match_files(\n            fs,\n            result,\n            [\n                {\"path\": \"foo\", \"isdir\": False},\n                {\"path\": \"subdir\", \"isdir\": True},\n            ],\n        )\n\n    def test_recursive(self, cloud):\n        cloud.gen({\"dir/foo\": \"foo contents\", \"dir/subdir/bar\": \"bar contents\"})\n        if not (cloud / \"dir\").is_dir():\n            pytest.skip(\"Cannot create directories on this cloud\")\n        fs, _ = parse_external_url(cloud.url, cloud.config)\n        result = ls_url(str(cloud / \"dir\"), fs_config=cloud.config, recursive=True)\n        match_files(\n            fs,\n            result,\n            [\n                {\"path\": \"foo\", \"isdir\": False},\n                {\"path\": \"subdir/bar\", \"isdir\": False},\n            ],\n        )\n\n        result = ls_url(\n            str(cloud / \"dir\"), fs_config=cloud.config, recursive=True, maxdepth=0\n        )\n        match_files(\n            fs,\n            result,\n            [{\"path\": (cloud / \"dir\").fs_path, \"isdir\": False}],\n        )\n\n        result = ls_url(\n            str(cloud / \"dir\"), fs_config=cloud.config, recursive=True, maxdepth=1\n        )\n        match_files(\n            fs,\n            result,\n            [\n                {\"path\": \"foo\", \"isdir\": False},\n                {\"path\": \"subdir\", \"isdir\": True},\n            ],\n        )\n\n        result = ls_url(\n            str(cloud / \"dir\"), fs_config=cloud.config, recursive=True, maxdepth=2\n        )\n        match_files(\n            fs,\n            result,\n            [\n                {\"path\": \"foo\", \"isdir\": False},\n                {\"path\": \"subdir/bar\", \"isdir\": False},\n            ],\n        )\n\n    def test_nonexistent(self, cloud):\n        with pytest.raises(URLMissingError):\n            ls_url(str(cloud / \"dir\"), fs_config=cloud.config)\n\n\nclass TestGetUrl:\n    def test_get_file(self, cloud, tmp_dir):\n        cloud.gen({\"foo\": \"foo contents\"})\n\n        Repo.get_url(str(cloud / \"foo\"), \"foo_imported\", fs_config=cloud.config)\n\n        assert (tmp_dir / \"foo_imported\").is_file()\n        assert (tmp_dir / \"foo_imported\").read_text() == \"foo contents\"\n\n    def test_get_dir(self, cloud, tmp_dir):\n        cloud.gen({\"foo\": {\"foo\": \"foo contents\"}})\n        if not (cloud / \"foo\").is_dir():\n            pytest.skip(\"Cannot create directories on this cloud\")\n\n        Repo.get_url(str(cloud / \"foo\"), \"foo_imported\", fs_config=cloud.config)\n\n        assert (tmp_dir / \"foo_imported\").is_dir()\n        assert (tmp_dir / \"foo_imported\" / \"foo\").is_file()\n        assert (tmp_dir / \"foo_imported\" / \"foo\").read_text() == \"foo contents\"\n\n    @pytest.mark.parametrize(\"dname\", [\".\", \"dir\", \"dir/subdir\"])\n    def test_get_url_to_dir(self, cloud, tmp_dir, dname):\n        cloud.gen({\"src\": {\"foo\": \"foo contents\"}})\n        if not (cloud / \"src\").is_dir():\n            pytest.skip(\"Cannot create directories on this cloud\")\n        tmp_dir.gen({\"dir\": {\"subdir\": {}}})\n\n        Repo.get_url(str(cloud / \"src\" / \"foo\"), dname, fs_config=cloud.config)\n\n        assert (tmp_dir / dname).is_dir()\n        assert (tmp_dir / dname / \"foo\").read_text() == \"foo contents\"\n\n    def test_get_url_nonexistent(self, cloud):\n        with pytest.raises(URLMissingError):\n            Repo.get_url(str(cloud / \"nonexistent\"), fs_config=cloud.config)\n\n\nclass TestToRemote:\n    def test_add_to_remote(self, tmp_dir, dvc, remote, workspace):\n        workspace.gen(\"foo\", \"foo\")\n\n        url = \"remote://workspace/foo\"\n        [stage] = dvc.add(url, to_remote=True)\n\n        assert not (tmp_dir / \"foo\").exists()\n        assert (tmp_dir / \"foo.dvc\").exists()\n\n        assert len(stage.deps) == 0\n        assert len(stage.outs) == 1\n\n        hash_info = stage.outs[0].hash_info\n        meta = stage.outs[0].meta\n        assert hash_info.name == \"md5\"\n        assert hash_info.value == \"acbd18db4cc2f85cedef654fccc4a4d8\"\n        assert (\n            remote / \"files\" / \"md5\" / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\"\n        ).read_text() == \"foo\"\n        assert meta.size == len(\"foo\")\n\n    def test_import_url_to_remote_file(self, tmp_dir, dvc, workspace, remote):\n        workspace.gen(\"foo\", \"foo\")\n\n        url = \"remote://workspace/foo\"\n        stage = dvc.imp_url(url, to_remote=True)\n\n        assert stage.deps[0].hash_info.value is not None\n        assert not (tmp_dir / \"foo\").exists()\n        assert (tmp_dir / \"foo.dvc\").exists()\n\n        assert len(stage.deps) == 1\n        assert stage.deps[0].def_path == url\n        assert len(stage.outs) == 1\n\n        hash_info = stage.outs[0].hash_info\n        assert hash_info.name == \"md5\"\n        assert hash_info.value == \"acbd18db4cc2f85cedef654fccc4a4d8\"\n        assert (\n            remote / \"files\" / \"md5\" / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\"\n        ).read_text() == \"foo\"\n        assert stage.outs[0].meta.size == len(\"foo\")\n\n    def test_import_url_to_remote_dir(self, tmp_dir, dvc, workspace, remote):\n        import json\n\n        workspace.gen(\n            {\n                \"data\": {\n                    \"foo\": \"foo\",\n                    \"bar\": \"bar\",\n                    \"sub_dir\": {\"baz\": \"sub_dir/baz\"},\n                }\n            }\n        )\n\n        url = \"remote://workspace/data\"\n        stage = dvc.imp_url(url, to_remote=True)\n\n        assert not (tmp_dir / \"data\").exists()\n        assert (tmp_dir / \"data.dvc\").exists()\n\n        assert len(stage.deps) == 1\n        assert stage.deps[0].def_path == url\n        assert len(stage.outs) == 1\n\n        hash_info = stage.outs[0].hash_info\n        assert hash_info.name == \"md5\"\n        assert hash_info.value == \"55d05978954d1b2cd7b06aedda9b9e43.dir\"\n        file_parts = json.loads(\n            (\n                remote / \"files\" / \"md5\" / \"55\" / \"d05978954d1b2cd7b06aedda9b9e43.dir\"\n            ).read_text()\n        )\n\n        assert len(file_parts) == 3\n        assert {file_part[\"relpath\"] for file_part in file_parts} == {\n            \"foo\",\n            \"bar\",\n            \"sub_dir/baz\",\n        }\n\n        for file_part in file_parts:\n            md5 = file_part[\"md5\"]\n            assert (\n                remote / \"files\" / \"md5\" / md5[:2] / md5[2:]\n            ).read_text() == file_part[\"relpath\"]\n"
  },
  {
    "path": "dvc/types.py",
    "content": "from typing import TYPE_CHECKING, Any, AnyStr, Union\n\nif TYPE_CHECKING:\n    from os import PathLike\n\nStrPath = Union[str, \"PathLike[str]\"]\nBytesPath = Union[bytes, \"PathLike[bytes]\"]\nGenericPath = Union[AnyStr, \"PathLike[AnyStr]\"]\nStrOrBytesPath = Union[str, bytes, \"PathLike[str]\", \"PathLike[bytes]\"]\n\nTargetType = Union[list[str], str]\nDictStrAny = dict[str, Any]\n"
  },
  {
    "path": "dvc/ui/__init__.py",
    "content": "from collections.abc import Iterable, Iterator, Sequence\nfrom contextlib import contextmanager, nullcontext\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, TextIO, Union\n\nimport colorama\n\nfrom dvc.utils.objects import cached_property\n\nif TYPE_CHECKING:\n    from rich.console import Console as RichConsole\n    from rich.console import JustifyMethod, OverflowMethod\n    from rich.status import Status\n    from rich.style import Style\n    from rich.text import Text as RichText\n\n    from dvc.progress import Tqdm\n    from dvc.types import StrPath\n    from dvc.ui.table import Headers, Styles, TableData\n\n\n@contextmanager\ndef disable_colorama():\n    import sys\n\n    colorama.deinit()\n    try:\n        yield\n    finally:\n        if sys.stdout:\n            sys.stdout.flush()\n        if sys.stderr:\n            sys.stderr.flush()\n        colorama.reinit()\n\n\nclass Formatter:\n    def __init__(\n        self, theme: Optional[dict] = None, defaults: Optional[dict] = None\n    ) -> None:\n        from collections import defaultdict\n\n        theme = theme or {\n            \"success\": {\"color\": \"green\", \"style\": \"bold\"},\n            \"warn\": {\"color\": \"yellow\"},\n            \"error\": {\"color\": \"red\", \"style\": \"bold\"},\n        }\n        self.theme = defaultdict(lambda: defaults or {}, theme)\n\n    def format(self, message: str, style: Optional[str] = None, **kwargs) -> str:\n        from dvc.utils import colorize\n\n        return colorize(message, **self.theme[style])\n\n\nclass Console:\n    def __init__(\n        self, formatter: Optional[Formatter] = None, enable: bool = False\n    ) -> None:\n        from contextvars import ContextVar\n\n        self.formatter: Formatter = formatter or Formatter()\n        self._enabled: bool = enable\n        self._paginate: ContextVar[bool] = ContextVar(\"_paginate\", default=False)\n\n    def enable(self) -> None:\n        self._enabled = True\n\n    def success(self, message: str) -> None:\n        self.write(message, style=\"success\")\n\n    def error(self, message: str) -> None:\n        self.error_write(message, style=\"error\")\n\n    def warn(self, message: str) -> None:\n        self.error_write(message, style=\"warn\")\n\n    def error_write(\n        self,\n        *objects: Any,\n        style: Optional[str] = None,\n        sep: Optional[str] = None,\n        end: Optional[str] = None,\n        styled: bool = False,\n        force: bool = True,\n    ) -> None:\n        return self.write(\n            *objects,\n            style=style,\n            sep=sep,\n            end=end,\n            stderr=True,\n            force=force,\n            styled=styled,\n        )\n\n    def write_json(\n        self,\n        data: Any,\n        indent: Optional[int] = None,\n        highlight: Optional[bool] = None,\n        stderr: bool = False,\n        skip_keys: bool = False,\n        ensure_ascii: bool = True,\n        check_circular: bool = True,\n        allow_nan: bool = True,\n        default: Optional[Callable[[Any], Any]] = None,\n        sort_keys: bool = False,\n    ) -> None:\n        if highlight is None:\n            highlight = self.isatty()\n        if indent is None and self.isatty():\n            indent = 2\n\n        from rich.json import JSON\n\n        json = JSON.from_data(\n            data=data,\n            indent=indent,\n            highlight=bool(highlight),\n            skip_keys=skip_keys,\n            ensure_ascii=ensure_ascii,\n            check_circular=check_circular,\n            allow_nan=allow_nan,\n            default=default,\n            sort_keys=sort_keys,\n        )\n        if not highlight:\n            import os\n\n            # we don't need colorama to try to strip ansi codes\n            # when highlighting is disabled\n            ctx = nullcontext() if \"DVC_TEST\" in os.environ else disable_colorama()\n            with ctx:\n                return self.write(json.text, stderr=stderr)\n        return self.rich_print(json, stderr=stderr, soft_wrap=True)\n\n    def rich_print(  # noqa: PLR0913\n        self,\n        *objects: Any,\n        sep: str = \" \",\n        end: str = \"\\n\",\n        stderr: bool = False,\n        style: Optional[Union[str, \"Style\"]] = None,\n        justify: Optional[\"JustifyMethod\"] = None,\n        overflow: Optional[\"OverflowMethod\"] = None,\n        no_wrap: Optional[bool] = None,\n        emoji: Optional[bool] = None,\n        markup: Optional[bool] = None,\n        highlight: Optional[bool] = None,\n        width: Optional[int] = None,\n        height: Optional[int] = None,\n        crop: bool = True,\n        soft_wrap: Optional[bool] = None,\n        new_line_start: bool = False,\n    ) -> None:\n        if stderr:\n            console = self.error_console\n        else:\n            console = self.rich_console\n        return console.print(\n            *objects,\n            sep=sep,\n            end=end,\n            style=style,\n            justify=justify,\n            overflow=overflow,\n            no_wrap=no_wrap,\n            emoji=emoji,\n            markup=markup,\n            highlight=highlight,\n            width=width,\n            height=height,\n            crop=crop,\n            soft_wrap=soft_wrap,\n            new_line_start=new_line_start,\n        )\n\n    def write(\n        self,\n        *objects: Any,\n        style: Optional[str] = None,\n        sep: Optional[str] = None,\n        end: Optional[str] = None,\n        stderr: bool = False,\n        force: bool = False,\n        styled: bool = False,\n        file: Optional[TextIO] = None,\n    ) -> None:\n        import sys\n\n        from dvc.progress import Tqdm\n\n        sep = \" \" if sep is None else sep\n        end = \"\\n\" if end is None else end\n        if not self._enabled and not force:\n            return None\n\n        file = file or (sys.stderr if stderr else sys.stdout)\n        with Tqdm.external_write_mode(file=file):\n            # if we are inside pager context, send the output to rich's buffer\n            if styled or self._paginate.get():\n                if styled:\n                    return self.rich_print(*objects, sep=sep, end=end, stderr=stderr)\n                return self.rich_print(\n                    sep.join(str(_object) for _object in objects),\n                    style=None,\n                    highlight=False,\n                    emoji=False,\n                    markup=False,\n                    no_wrap=True,\n                    overflow=\"ignore\",\n                    crop=False,\n                    sep=sep,\n                    end=end,\n                    stderr=stderr,\n                )\n\n            values = (self.formatter.format(obj, style) for obj in objects)\n            return print(*values, sep=sep, end=end, file=file)\n\n    @property\n    def rich_text(self) -> \"type[RichText]\":\n        from rich.text import Text\n\n        return Text\n\n    @staticmethod\n    def progress(*args, **kwargs) -> \"Tqdm\":\n        from dvc.progress import Tqdm\n\n        return Tqdm(*args, **kwargs)\n\n    @contextmanager\n    def pager(self, styles: bool = True) -> Iterator[None]:\n        from .pager import DvcPager\n\n        tok = self._paginate.set(True)\n        try:\n            with self.rich_console.pager(pager=DvcPager(), styles=styles):\n                yield\n        finally:\n            self._paginate.reset(tok)\n\n    def prompt(\n        self,\n        text: str,\n        choices: Optional[Iterable[str]] = None,\n        password: bool = False,\n    ) -> Optional[str]:\n        while True:\n            try:\n                response = self.rich_console.input(\n                    text + \" \", markup=False, password=password\n                )\n            except EOFError:\n                return None\n\n            answer = response.lower()\n            if not choices:\n                return answer\n\n            if answer in choices:\n                return answer\n\n            self.write(f\"Your response must be one of: {choices}. Please try again.\")\n\n    def confirm(self, statement: str) -> bool:\n        \"\"\"Ask the user for confirmation about the specified statement.\n\n        Args:\n            statement: statement to ask the user confirmation about.\n        \"\"\"\n        text = f\"{statement} [y/n]:\"\n        answer = self.prompt(text, choices=[\"yes\", \"no\", \"y\", \"n\"])\n        if not answer:\n            return False\n        return answer.startswith(\"y\")\n\n    @cached_property\n    def rich_console(self) -> \"RichConsole\":\n        \"\"\"rich_console is only set to stdout for now.\"\"\"\n        from rich import console\n\n        return console.Console()\n\n    @cached_property\n    def error_console(self) -> \"RichConsole\":\n        from rich import console\n\n        return console.Console(stderr=True)\n\n    def table(\n        self,\n        data: \"TableData\",\n        headers: Optional[\"Headers\"] = None,\n        markdown: bool = False,\n        rich_table: bool = False,\n        force: bool = True,\n        pager: bool = False,\n        header_styles: Optional[Union[dict[str, \"Styles\"], Sequence[\"Styles\"]]] = None,\n        row_styles: Optional[Sequence[\"Styles\"]] = None,\n        borders: Union[bool, str] = False,\n        colalign: Optional[tuple[str, ...]] = None,\n    ) -> None:\n        from dvc.ui import table as t\n\n        if not data and not markdown:\n            return None\n\n        if not markdown and rich_table:\n            if force or self._enabled:\n                return t.rich_table(\n                    self,\n                    data,\n                    headers,\n                    pager=pager,\n                    header_styles=header_styles,\n                    row_styles=row_styles,\n                    borders=borders,\n                )\n\n            return None\n\n        return t.plain_table(\n            self,\n            data,\n            headers,\n            markdown=markdown,\n            pager=pager,\n            force=force,\n            colalign=colalign,\n        )\n\n    def status(self, status: str, **kwargs: Any) -> \"Status\":\n        return self.error_console.status(status, **kwargs)\n\n    @staticmethod\n    def isatty() -> bool:\n        import sys\n\n        from dvc import utils\n\n        return utils.isatty(sys.stdout)\n\n    def open_browser(self, file: \"StrPath\") -> int:\n        import webbrowser\n        from pathlib import Path\n        from platform import uname\n\n        from dvc.utils import relpath\n\n        path = Path(file).resolve()\n        url = relpath(path) if \"microsoft\" in uname().release.lower() else path.as_uri()\n\n        opened = webbrowser.open(url)\n\n        if not opened:\n            ui.error_write(f\"Failed to open {url}. Please try opening it manually.\")\n            return 1\n\n        return 0\n\n\nui = Console()\n\n\nif __name__ == \"__main__\":\n    ui.enable()\n\n    ui.write(\"No default remote set\")\n    ui.success(\"Everything is up to date.\")\n    ui.warn(\"Run queued experiments will be removed.\")\n    ui.error(\"too few arguments.\")\n\n    ui.table([(\"scores.json\", \"0.5674\")], headers=[\"Path\", \"auc\"])\n    ui.table([(\"scores.json\", \"0.5674\")], headers=[\"Path\", \"auc\"], markdown=True)\n"
  },
  {
    "path": "dvc/ui/_rich_progress.py",
    "content": "from funcy import split\nfrom rich.progress import (\n    BarColumn,\n    DownloadColumn,\n    MofNCompleteColumn,\n    Progress,\n    SpinnerColumn,\n    TextColumn,\n    TimeElapsedColumn,\n    TimeRemainingColumn,\n    TransferSpeedColumn,\n)\n\n\nclass MofNCompleteColumnWithUnit(MofNCompleteColumn):\n    \"\"\"Requires `task.fields[unit]` to be set.\"\"\"\n\n    def render(self, task):\n        ret = super().render(task)\n        unit = task.fields.get(\"unit\")\n        return ret.append(f\" {unit}\") if unit else ret\n\n\nclass RichProgress(Progress):\n    def clear_task(self, task):\n        try:\n            self.remove_task(task)\n        except KeyError:\n            pass\n\n\nclass RichTransferProgress(RichProgress):\n    SUMMARY_COLS = (\n        TextColumn(\"[magenta]{task.description}[bold green]\"),\n        MofNCompleteColumnWithUnit(),\n        TimeElapsedColumn(),\n    )\n    TRANSFER_COLS = (\n        TextColumn(\"  [blue]{task.description}\"),\n        BarColumn(),\n        DownloadColumn(),\n        TransferSpeedColumn(),\n        TextColumn(\"eta\"),\n        TimeRemainingColumn(),\n    )\n\n    def get_renderables(self):\n        summary_tasks, other_tasks = split(\n            lambda task: task.fields.get(\"progress_type\") == \"summary\",\n            self.tasks,\n        )\n        self.columns = self.SUMMARY_COLS\n        yield self.make_tasks_table(summary_tasks)\n        self.columns = self.TRANSFER_COLS\n        yield self.make_tasks_table(other_tasks)\n\n\nclass DbDownloadProgress(RichProgress):\n    PROGRESS_COLS = (\n        SpinnerColumn(),\n        TextColumn(\"[progress.description]{task.description}\"),\n        TextColumn(\"[progress.download]{task.completed:,} rows\"),\n        TextColumn(\"to [repr.filename]{task.fields[output]}\"),\n    )\n    STATUS_COLS = (\n        SpinnerColumn(),\n        TextColumn(\"[progress.description]{task.description}\"),\n        TextColumn(\"to [repr.filename]{task.fields[output]}\"),\n    )\n\n    def get_renderables(self):\n        if self.tasks:\n            (task, *_) = self.tasks\n            cols = self.PROGRESS_COLS if task.completed else self.STATUS_COLS\n            self.columns = cols[1:] if task.finished else cols\n        yield self.make_tasks_table(self.tasks)\n"
  },
  {
    "path": "dvc/ui/pager.py",
    "content": "\"\"\"Draws DAG in ASCII.\"\"\"\n\nimport os\nimport pydoc\n\nfrom rich.pager import Pager\n\nfrom dvc.env import DVC_PAGER\nfrom dvc.log import logger\nfrom dvc.utils import format_link\n\nlogger = logger.getChild(__name__)\n\n\nDEFAULT_PAGER = \"less\"\nLESS = \"LESS\"\nPAGER_ENV = \"PAGER\"\n\n\ndef prepare_default_pager(\n    clear_screen: bool = False,\n    quit_if_one_screen: bool = True,\n    ansi_escapes: bool = True,\n    chop_long_lines: bool = True,\n    no_init: bool = True,\n    no_tilde: bool = False,\n) -> str:\n    args = [DEFAULT_PAGER]\n    if clear_screen:\n        args.append(\"--clear-screen\")  # -c\n    if quit_if_one_screen:\n        args.append(\"--quit-if-one-screen\")  # -F\n    if ansi_escapes:\n        args.append(\"--RAW-CONTROL-CHARS\")  # -R\n    if chop_long_lines:\n        args.append(\"--chop-long-lines\")  # -S\n    if no_init:\n        args.append(\"--no-init\")  # -X\n    if no_tilde:\n        args.append(\"--tilde\")  # -~\n\n    return \" \".join(args)\n\n\ndef make_pager(cmd=None):\n    def _pager(text):\n        assert cmd\n        return pydoc.tempfilepager(pydoc.plain(text), cmd)\n\n    return _pager if cmd else pydoc.plainpager\n\n\ndef find_pager():\n    from . import Console\n\n    if not Console.isatty():\n        return None\n\n    pager = os.getenv(DVC_PAGER)\n    if not pager:\n        pager = os.getenv(PAGER_ENV)\n    if not pager:\n        ret = os.system(f\"({DEFAULT_PAGER}) 2>{os.devnull}\")  # noqa: S605\n        if ret != 0:\n            logger.warning(\n                \"Unable to find `less` in the PATH. Check out %s for more info.\",\n                format_link(\"https://man.dvc.org/pipeline/show\"),\n            )\n        else:\n            pager = DEFAULT_PAGER\n\n    if pager == DEFAULT_PAGER:\n        # if pager is less (i.e. default), regardless of `$LESS`, apply `-RS`.\n        # `-R` is required to render ansi escape sequences for exp show\n        # and, `-S` is required for horizontal scrolling.\n        less_env = bool(os.getenv(LESS))\n        return prepare_default_pager(\n            ansi_escapes=True,\n            chop_long_lines=True,\n            quit_if_one_screen=not less_env,\n            no_init=not less_env,\n        )\n\n    return pager\n\n\ndef pager(text: str) -> None:\n    _pager = find_pager()\n    logger.trace(\"Using pager: '%s'\", _pager)\n    make_pager(_pager)(text)\n\n\nclass DvcPager(Pager):\n    def show(self, content: str) -> None:\n        pager(content)\n"
  },
  {
    "path": "dvc/ui/table.py",
    "content": "from collections import abc\nfrom collections.abc import Iterator, Sequence\nfrom contextlib import ExitStack, contextmanager\nfrom itertools import zip_longest\nfrom typing import TYPE_CHECKING, Optional, Union\n\nfrom dvc.types import DictStrAny\n\nif TYPE_CHECKING:\n    from rich.console import Console as RichConsole\n    from rich.table import Table\n\n    from dvc.ui import Console, RichText\n\nSHOW_MAX_WIDTH = 1024\n\n\nCellT = Union[str, \"RichText\", None]  # RichText is mostly compatible with str\nRow = Sequence[CellT]\nTableData = Sequence[Row]\nHeaders = Sequence[str]\nStyles = DictStrAny\n\n\ndef plain_table(\n    ui: \"Console\",\n    data: TableData,\n    headers: Optional[Headers] = None,\n    markdown: bool = False,\n    pager: bool = False,\n    force: bool = True,\n    colalign: Optional[tuple[str, ...]] = None,\n) -> None:\n    from funcy import nullcontext\n    from tabulate import tabulate\n\n    text: str = tabulate(\n        data,\n        headers if headers is not None else (),\n        tablefmt=\"github\" if markdown else \"plain\",\n        disable_numparse=True,\n        # None will be shown as \"\" by default, overriding\n        missingval=\"-\",\n        colalign=colalign,\n    )\n    if markdown:\n        # NOTE: md table is incomplete without the trailing newline\n        text += \"\\n\"\n\n    cm = ui.pager() if pager else nullcontext()\n    with cm:\n        ui.write(text, force=force)\n\n\n@contextmanager\ndef console_width(table: \"Table\", console: \"RichConsole\", val: int) -> Iterator[None]:\n    # NOTE: rich does not have native support for unlimited width\n    # via pager. we override rich table compression by setting\n    # console width to the full width of the table\n\n    console_options = console.options\n    original = console_options.max_width\n    con_width = console._width\n\n    try:\n        console_options.max_width = val\n        measurement = table.__rich_measure__(console, console_options)\n        console._width = measurement.maximum\n\n        yield\n    finally:\n        console_options.max_width = original\n        console._width = con_width\n\n\ndef rich_table(\n    ui: \"Console\",\n    data: TableData,\n    headers: Optional[Headers] = None,\n    pager: bool = False,\n    header_styles: Optional[Union[dict[str, Styles], Sequence[Styles]]] = None,\n    row_styles: Optional[Sequence[Styles]] = None,\n    borders: Union[bool, str] = False,\n) -> None:\n    from rich import box\n\n    from dvc.utils.table import Table\n\n    border_style = {\n        True: box.HEAVY_HEAD,  # is a default in rich,\n        False: None,\n        \"simple\": box.SIMPLE,\n        \"minimal\": box.MINIMAL,\n        \"horizontals\": box.HORIZONTALS,\n    }\n\n    table = Table(box=border_style[borders])\n\n    if isinstance(header_styles, abc.Sequence):\n        hs: dict[str, Styles] = dict(zip(headers or [], header_styles))\n    else:\n        hs = header_styles or {}\n\n    for header in headers or []:\n        table.add_column(header, **hs.get(header, {}))\n\n    rs: Sequence[Styles] = row_styles or []\n    for row, style in zip_longest(data, rs):\n        table.add_row(*row, **(style or {}))\n\n    stack = ExitStack()\n    if pager:\n        stack.enter_context(console_width(table, ui.rich_console, SHOW_MAX_WIDTH))\n        stack.enter_context(ui.pager())\n\n    with stack:\n        ui.write(table, styled=True)\n        return\n"
  },
  {
    "path": "dvc/updater.py",
    "content": "import os\nimport time\nfrom typing import TYPE_CHECKING, Optional\n\nfrom packaging import version\n\nfrom dvc import PKG, __version__\nfrom dvc.env import DVC_UPDATER_ENDPOINT\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from dvc.ui import RichText\n\nlogger = logger.getChild(__name__)\n\n\nclass Updater:\n    URL = \"https://updater.dvc.org\"\n    UPDATER_FILE = \"updater\"\n    TIMEOUT = 24 * 60 * 60  # every day\n    TIMEOUT_GET = 10\n\n    def __init__(self, tmp_dir, friendly=False, hardlink_lock=False):\n        from dvc.lock import make_lock\n\n        self.updater_file = os.path.join(tmp_dir, self.UPDATER_FILE)\n        self.lock = make_lock(\n            self.updater_file + \".lock\",\n            tmp_dir=tmp_dir,\n            friendly=friendly,\n            hardlink_lock=hardlink_lock,\n        )\n        self.current = version.parse(__version__).base_version\n\n    def _is_outdated_file(self):\n        ctime = os.path.getmtime(self.updater_file)\n        outdated = time.time() - ctime >= self.TIMEOUT\n        if outdated:\n            logger.debug(\"'%s' is outdated\", self.updater_file)\n        return outdated\n\n    def _with_lock(self, func, action):\n        from dvc.lock import LockError\n\n        try:\n            with self.lock:\n                func()\n        except LockError:\n            logger.trace(\"\", exc_info=True)\n            logger.debug(\n                \"Failed to acquire '%s' before %s updates\",\n                self.lock.lockfile,\n                action,\n            )\n\n    def check(self):\n        from dvc.utils import env2bool\n\n        if (\n            os.getenv(\"CI\")\n            or env2bool(\"DVC_TEST\")\n            or PKG == \"snap\"\n            or not self.is_enabled()\n        ):\n            return\n\n        self._with_lock(self._check, \"checking\")\n\n    def _check(self):\n        if not os.path.exists(self.updater_file) or self._is_outdated_file():\n            self.fetch()\n            return\n\n        with open(self.updater_file, encoding=\"utf-8\") as fobj:\n            import json\n\n            try:\n                info = json.load(fobj)\n                latest = info[\"version\"]\n            except Exception as e:  # noqa: BLE001\n                logger.trace(\"\", exc_info=True)\n                logger.debug(\"'%s' is not a valid json: %s\", self.updater_file, e)\n                self.fetch()\n                return\n\n        if version.parse(self.current) < version.parse(latest):\n            self._notify(latest)\n\n    def fetch(self, detach=True):\n        from dvc.daemon import daemon\n\n        if detach:\n            daemon([\"updater\"])\n            return\n\n        self._with_lock(self._get_latest_version, \"fetching\")\n\n    def _get_latest_version(self):\n        import json\n\n        import requests\n        from requests.exceptions import RequestException\n\n        url = os.environ.get(DVC_UPDATER_ENDPOINT, self.URL)\n        logger.debug(\"Checking updates in %s\", url)\n        try:\n            resp = requests.get(url, timeout=self.TIMEOUT_GET)\n            info = resp.json()\n        except RequestException as exc:\n            logger.trace(\"\", exc_info=True)\n            logger.debug(\"Failed to retrieve latest version: %s\", exc)\n            return\n\n        logger.trace(\"received payload: %s (status=%s)\", info, resp.status_code)\n        with open(self.updater_file, \"w+\", encoding=\"utf-8\") as fobj:\n            logger.trace(\"Saving latest version info to %s\", self.updater_file)\n            json.dump(info, fobj)\n\n    def _notify(self, latest: str, pkg: Optional[str] = PKG) -> None:\n        from dvc.ui import ui\n\n        if not ui.isatty():\n            return None\n\n        message = self._get_message(latest, pkg=pkg)\n        return ui.error_write(message, styled=True)\n\n    def _get_message(\n        self,\n        latest: str,\n        current: Optional[str] = None,\n        color: str = \"yellow\",\n        pkg: Optional[str] = None,\n    ) -> \"RichText\":\n        from dvc.ui import ui\n\n        current = current or self.current\n        update_message = ui.rich_text.from_markup(\n            f\"You are using dvc version [bold]{current}[/]; \"\n            f\"however, version [bold]{latest}[/] is available.\"\n        )\n        instruction = ui.rich_text.from_markup(self._get_update_instructions(pkg=pkg))\n        return ui.rich_text.assemble(\n            \"\\n\", update_message, \"\\n\", instruction, style=color\n        )\n\n    @staticmethod\n    def _get_update_instructions(pkg: Optional[str] = None) -> str:\n        if pkg in (\"osxpkg\", \"exe\", \"binary\"):\n            return (\n                \"To upgrade, uninstall dvc and reinstall from [blue]https://dvc.org[/].\"\n            )\n\n        instructions = {\n            \"pip\": \"pip install --upgrade dvc\",\n            \"rpm\": \"yum update dvc\",\n            \"brew\": \"brew upgrade dvc\",\n            \"deb\": \"apt-get install --only-upgrade dvc\",\n            \"conda\": \"conda update dvc\",\n            \"choco\": \"choco upgrade dvc\",\n        }\n\n        if pkg not in instructions:\n            return (\n                \"Find the latest release at \"\n                \"[blue]https://github.com/treeverse/dvc/releases/latest[/].\"\n            )\n\n        instruction = instructions[pkg]\n        return f\"To upgrade, run '{instruction}'.\"\n\n    def is_enabled(self):\n        from dvc.config import Config, to_bool\n\n        enabled = to_bool(\n            Config.from_cwd(validate=False).get(\"core\", {}).get(\"check_update\", \"true\")\n        )\n        logger.debug(\"Check for update is %sabled.\", \"en\" if enabled else \"dis\")\n        return enabled\n\n\ndef notify_updates():\n    from contextlib import suppress\n\n    from dvc.repo import NotDvcRepoError, Repo\n\n    with suppress(NotDvcRepoError), Repo() as repo:\n        hardlink_lock = repo.config[\"core\"].get(\"hardlink_lock\", False)\n        updater = Updater(repo.tmp_dir, hardlink_lock=hardlink_lock)\n        updater.check()\n"
  },
  {
    "path": "dvc/utils/.gitignore",
    "content": "/build.py\n"
  },
  {
    "path": "dvc/utils/__init__.py",
    "content": "\"\"\"Helpers for other modules.\"\"\"\n\nimport hashlib\nimport json\nimport os\nimport re\nimport sys\nfrom typing import TYPE_CHECKING, Optional\n\nimport colorama\nfrom colorama import AnsiToWin32\n\nif TYPE_CHECKING:\n    from typing import TextIO\n\n\nLARGE_DIR_SIZE = 100\nTARGET_REGEX = re.compile(r\"(?P<path>.*?)(:(?P<name>[^\\\\/:]*))??$\")\n\n\ndef bytes_hash(byts, typ):\n    hasher = getattr(hashlib, typ)(usedforsecurity=False)\n    hasher.update(byts)\n    return hasher.hexdigest()\n\n\ndef dict_filter(d, exclude=()):\n    \"\"\"\n    Exclude specified keys from a nested dict\n    \"\"\"\n    if not exclude or not isinstance(d, (list, dict)):\n        return d\n\n    if isinstance(d, list):\n        return [dict_filter(e, exclude) for e in d]\n\n    return {k: dict_filter(v, exclude) for k, v in d.items() if k not in exclude}\n\n\ndef dict_hash(d, typ, exclude=()):\n    filtered = dict_filter(d, exclude)\n    byts = json.dumps(filtered, sort_keys=True).encode(\"utf-8\")\n    return bytes_hash(byts, typ)\n\n\ndef dict_md5(d, **kwargs):\n    return dict_hash(d, \"md5\", **kwargs)\n\n\ndef dict_sha256(d, **kwargs):\n    return dict_hash(d, \"sha256\", **kwargs)\n\n\ndef _split(list_to_split, chunk_size):\n    return [\n        list_to_split[i : i + chunk_size]\n        for i in range(0, len(list_to_split), chunk_size)\n    ]\n\n\n# NOTE: Check if we are in a bundle\n# https://pythonhosted.org/PyInstaller/runtime-information.html\ndef is_binary():\n    return getattr(sys, \"frozen\", False)\n\n\ndef fix_env(env=None):\n    \"\"\"Fix env variables modified by PyInstaller [1] and pyenv [2].\n    [1] http://pyinstaller.readthedocs.io/en/stable/runtime-information.html\n    [2] https://github.com/pyenv/pyenv/issues/985\n    \"\"\"\n    if env is None:\n        env = os.environ.copy()\n    else:\n        env = env.copy()\n\n    if is_binary():\n        lp_key = \"LD_LIBRARY_PATH\"\n        lp_orig = env.get(lp_key + \"_ORIG\", None)\n        if lp_orig is not None:\n            env[lp_key] = lp_orig\n        else:\n            env.pop(lp_key, None)\n\n    # Unlike PyInstaller, pyenv doesn't leave backups of original env vars\n    # when it modifies them. If we look into the shim, pyenv and pyenv-exec,\n    # we can figure out that the PATH is modified like this:\n    #\n    #     PATH=$PYENV_BIN_PATH:${bin_path}:${plugin_bin}:$PATH\n    #\n    # where\n    #\n    #     PYENV_BIN_PATH - might not start with $PYENV_ROOT if we are running\n    #         `system` version of the command, see pyenv-exec source code.\n    #     bin_path - might not start with $PYENV_ROOT as it runs realpath on\n    #         it, but always has `libexec` part in it, see pyenv source code.\n    #     plugin_bin - might contain more than 1 entry, which start with\n    #         $PYENV_ROOT, see pyenv source code.\n    #\n    # Also, we know that whenever pyenv is running, it exports these env vars:\n    #\n    #     PYENV_DIR\n    #     PYENV_HOOK_PATH\n    #     PYENV_VERSION\n    #     PYENV_ROOT\n    #\n    # So having this, we can make a rightful assumption about what parts of the\n    # PATH we need to remove in order to get the original PATH.\n    path = env.get(\"PATH\", \"\")\n    parts = path.split(\":\")\n    bin_path = parts[1] if len(parts) > 2 else \"\"\n    pyenv_dir = env.get(\"PYENV_DIR\")\n    pyenv_hook_path = env.get(\"PYENV_HOOK_PATH\")\n    pyenv_version = env.get(\"PYENV_VERSION\")\n    pyenv_root = env.get(\"PYENV_ROOT\")\n\n    env_matches = all([pyenv_dir, pyenv_hook_path, pyenv_version, pyenv_root])\n\n    bin_path_matches = os.path.basename(bin_path) == \"libexec\"\n\n    # NOTE: we don't support pyenv-win\n    if os.name != \"nt\" and env_matches and bin_path_matches:\n        # removing PYENV_BIN_PATH and bin_path\n        parts = parts[2:]\n\n        if parts:\n            # removing plugin_bin from the left\n            plugin_bin = os.path.join(pyenv_root, \"plugins\")\n            while parts[0].startswith(plugin_bin):\n                del parts[0]\n\n        env[\"PATH\"] = \":\".join(parts)\n\n    return env\n\n\ndef colorize(message, color=None, style=None):\n    \"\"\"Returns a message in a specified color.\"\"\"\n    if not color:\n        return message\n\n    styles = {\"dim\": colorama.Style.DIM, \"bold\": colorama.Style.BRIGHT}\n\n    colors = {\n        \"green\": colorama.Fore.GREEN,\n        \"yellow\": colorama.Fore.YELLOW,\n        \"blue\": colorama.Fore.BLUE,\n        \"red\": colorama.Fore.RED,\n        \"magenta\": colorama.Fore.MAGENTA,\n        \"cyan\": colorama.Fore.CYAN,\n    }\n\n    return \"{style}{color}{message}{reset}\".format(\n        style=styles.get(style, \"\"),\n        color=colors.get(color, \"\"),\n        message=message,\n        reset=colorama.Style.RESET_ALL,\n    )\n\n\ndef boxify(message, border_color=None):\n    \"\"\"Put a message inside a box.\n\n    Args:\n        message (unicode): message to decorate.\n        border_color (unicode): name of the color to outline the box with.\n    \"\"\"\n    lines = message.split(\"\\n\")\n    max_width = max(_visual_width(line) for line in lines)\n\n    padding_horizontal = 5\n    padding_vertical = 1\n\n    box_size_horizontal = max_width + (padding_horizontal * 2)\n\n    chars = {\"corner\": \"+\", \"horizontal\": \"-\", \"vertical\": \"|\", \"empty\": \" \"}\n\n    margin = \"{corner}{line}{corner}\\n\".format(\n        corner=chars[\"corner\"], line=chars[\"horizontal\"] * box_size_horizontal\n    )\n\n    padding_lines = [\n        \"{border}{space}{border}\\n\".format(\n            border=colorize(chars[\"vertical\"], color=border_color),\n            space=chars[\"empty\"] * box_size_horizontal,\n        )\n        * padding_vertical\n    ]\n\n    content_lines = [\n        \"{border}{space}{content}{space}{border}\\n\".format(\n            border=colorize(chars[\"vertical\"], color=border_color),\n            space=chars[\"empty\"] * padding_horizontal,\n            content=_visual_center(line, max_width),\n        )\n        for line in lines\n    ]\n\n    return \"{margin}{padding}{content}{padding}{margin}\".format(\n        margin=colorize(margin, color=border_color),\n        padding=\"\".join(padding_lines),\n        content=\"\".join(content_lines),\n    )\n\n\ndef _visual_width(line):\n    \"\"\"Get the number of columns required to display a string\"\"\"\n\n    return len(re.sub(AnsiToWin32.ANSI_CSI_RE, \"\", line))\n\n\ndef _visual_center(line, width):\n    \"\"\"Center align string according to it's visual width\"\"\"\n\n    spaces = max(width - _visual_width(line), 0)\n    left_padding = int(spaces / 2)\n    right_padding = spaces - left_padding\n\n    return (left_padding * \" \") + line + (right_padding * \" \")\n\n\ndef relpath(path, start=os.curdir):\n    path = os.path.abspath(os.fspath(path))\n    start = os.path.abspath(os.fspath(start))\n\n    # Windows path on different drive than curdir doesn't have relpath\n    if os.name == \"nt\" and not os.path.commonprefix([start, path]):\n        return path\n\n    return os.path.relpath(path, start)\n\n\ndef as_posix(path: str) -> str:\n    import ntpath\n    import posixpath\n\n    return path.replace(ntpath.sep, posixpath.sep)\n\n\ndef env2bool(var, undefined=False):\n    \"\"\"\n    undefined: return value if env var is unset\n    \"\"\"\n    var = os.getenv(var, None)\n    if var is None:\n        return undefined\n    return bool(re.search(\"1|y|yes|true\", var, flags=re.IGNORECASE))\n\n\ndef resolve_output(inp: str, out: Optional[str], force=False) -> str:\n    from urllib.parse import urlparse\n\n    from dvc.exceptions import FileExistsLocallyError\n\n    name = os.path.basename(os.path.normpath(urlparse(inp).path))\n    if not out:\n        ret = name\n    elif os.path.isdir(out):\n        ret = os.path.join(out, name)\n    else:\n        ret = out\n\n    if os.path.exists(ret) and not force:\n        hint = \"\\nTo override it, re-run with '--force'.\"\n        raise FileExistsLocallyError(ret, hint=hint)\n\n    return ret\n\n\ndef resolve_paths(repo, out, always_local=False):\n    from urllib.parse import urlparse\n\n    from dvc.dvcfile import DVC_FILE_SUFFIX\n    from dvc.exceptions import DvcException\n    from dvc.fs import localfs\n\n    from .fs import contains_symlink_up_to\n\n    abspath = os.path.abspath(out)\n    dirname = os.path.dirname(abspath)\n    base = os.path.basename(os.path.normpath(out))\n\n    scheme = urlparse(out).scheme\n\n    if os.name == \"nt\" and scheme == os.path.splitdrive(abspath)[0][0].lower():\n        # urlparse interprets windows drive letters as URL scheme\n        scheme = \"\"\n\n    if scheme or not localfs.isin_or_eq(abspath, repo.root_dir):\n        wdir = os.getcwd()\n    elif contains_symlink_up_to(dirname, repo.root_dir) or (\n        os.path.isdir(abspath) and localfs.is_symlink(abspath)\n    ):\n        msg = (\n            \"Cannot add files inside symlinked directories to DVC. \"\n            \"See {} for more information.\"\n        ).format(\n            format_link(\"https://dvc.org/doc/user-guide/troubleshooting#add-symlink\")\n        )\n        raise DvcException(msg)\n    else:\n        wdir = dirname\n        out = base\n\n    if always_local:\n        out = base\n\n    path = os.path.join(wdir, base + DVC_FILE_SUFFIX)\n\n    return (path, wdir, out)\n\n\ndef format_link(link):\n    return \"<{blue}{link}{nc}>\".format(  # noqa: UP032\n        blue=colorama.Fore.CYAN, link=link, nc=colorama.Fore.RESET\n    )\n\n\ndef error_link(name):\n    return format_link(f\"https://error.dvc.org/{name}\")\n\n\ndef parse_target(\n    target: str, default: Optional[str] = None, isa_glob: bool = False\n) -> tuple[Optional[str], Optional[str]]:\n    from dvc.dvcfile import LOCK_FILE, PROJECT_FILE, is_valid_filename\n    from dvc.exceptions import DvcException\n    from dvc.parsing import JOIN\n\n    if not target:\n        return None, None\n\n    default = default or PROJECT_FILE\n    if isa_glob:\n        path, _, glob = target.rpartition(\":\")\n        return path or default, glob or None\n\n    # look for first \"@\", so as not to assume too much about stage name\n    # eg: it might contain \":\" in a generated stages from dict which might\n    # affect further parsing with the regex.\n    group, _, key = target.partition(JOIN)\n    match = TARGET_REGEX.match(group)\n\n    if not match:\n        return target, None\n\n    path, name = (match.group(\"path\"), match.group(\"name\"))\n\n    if name and key:\n        name += f\"{JOIN}{key}\"\n\n    if path:\n        if os.path.basename(path) == LOCK_FILE:\n            raise DvcException(\n                \"Did you mean: `{}`?\".format(target.replace(\".lock\", \".yaml\", 1))\n            )\n        if not name:\n            ret = (target, None)\n            return ret if is_valid_filename(target) else ret[::-1]\n    return path or default, name\n\n\ndef glob_targets(targets, glob=True, recursive=True):\n    from dvc.exceptions import DvcException\n\n    if not glob:\n        return targets\n\n    from glob import iglob\n\n    results = [\n        exp_target\n        for target in targets\n        for exp_target in iglob(target, recursive=recursive)\n    ]\n\n    if not results:\n        msg = f\"Glob {targets} has no matches.\"\n        raise DvcException(msg)\n\n    return results\n\n\ndef error_handler(func):\n    def wrapper(*args, **kwargs):\n        onerror = kwargs.get(\"onerror\")\n        result = {}\n\n        try:\n            vals = func(*args, **kwargs)\n            if vals:\n                result[\"data\"] = vals\n        except Exception as e:  # noqa: BLE001\n            if onerror is not None:\n                onerror(result, e, **kwargs)\n        return result\n\n    return wrapper\n\n\ndef errored_revisions(rev_data: dict) -> list:\n    from dvc.utils.collections import nested_contains\n\n    result = []\n    for revision, data in rev_data.items():\n        if nested_contains(data, \"error\"):\n            result.append(revision)\n    return result\n\n\ndef isatty(stream: \"Optional[TextIO]\") -> bool:\n    if stream is None:\n        return False\n    return stream.isatty()\n"
  },
  {
    "path": "dvc/utils/cli_parse.py",
    "content": "from collections import defaultdict\nfrom collections.abc import Iterable\n\n\ndef parse_params(path_params: Iterable[str]) -> list[dict[str, list[str]]]:\n    \"\"\"Normalizes the shape of params from the CLI to dict.\"\"\"\n    from dvc.dependency.param import ParamsDependency\n\n    ret: dict[str, list[str]] = defaultdict(list)\n    for path_param in path_params:\n        path, _, params_str = path_param.rpartition(\":\")\n        # remove empty strings from params, on condition such as `-p \"file1:\"`\n        params = filter(bool, params_str.split(\",\"))\n        if not path:\n            path = ParamsDependency.DEFAULT_PARAMS_FILE\n        ret[path].extend(params)\n    return [{path: params} for path, params in ret.items()]\n\n\ndef to_path_overrides(path_params: Iterable[str]) -> dict[str, list[str]]:\n    \"\"\"Group overrides by path\"\"\"\n    from dvc.dependency.param import ParamsDependency\n\n    path_overrides = defaultdict(list)\n    for path_param in path_params:\n        path_and_name = path_param.partition(\"=\")[0]\n        if \":\" not in path_and_name:\n            override = path_param\n            path = ParamsDependency.DEFAULT_PARAMS_FILE\n        else:\n            path, _, override = path_param.partition(\":\")\n\n        path_overrides[path].append(override)\n\n    return dict(path_overrides)\n"
  },
  {
    "path": "dvc/utils/collections.py",
    "content": "from collections.abc import Iterable, Mapping\nfrom typing import Union, no_type_check\n\n\n@no_type_check\ndef apply_diff(src, dest):  # noqa: C901\n    \"\"\"Recursively apply changes from src to dest.\n\n    Preserves dest type and hidden info in dest structure,\n    like ruamel.yaml leaves when parses files. This includes comments,\n    ordering and line foldings.\n\n    Used in Stage load/dump cycle to preserve comments and custom formatting.\n    \"\"\"\n    Seq = (list, tuple)  # noqa: N806\n    Container = (Mapping, list, tuple)  # noqa: N806\n\n    def is_same_type(a, b):\n        return any(\n            isinstance(a, t) and isinstance(b, t) for t in [str, Mapping, Seq, bool]\n        )\n\n    if isinstance(src, Mapping) and isinstance(dest, Mapping):\n        for key, value in src.items():\n            if isinstance(value, Container) and is_same_type(value, dest.get(key)):\n                apply_diff(value, dest[key])\n            elif key not in dest or value != dest[key]:\n                dest[key] = value\n        for key in set(dest) - set(src):\n            del dest[key]\n    elif isinstance(src, Seq) and isinstance(dest, Seq):\n        if len(src) != len(dest):\n            dest[:] = src\n        else:\n            for i, value in enumerate(src):\n                if isinstance(value, Container) and is_same_type(value, dest[i]):\n                    apply_diff(value, dest[i])\n                elif value != dest[i]:\n                    dest[i] = value\n    else:\n        raise AssertionError(  # noqa: TRY004\n            f\"Can't apply diff from {type(src).__name__} to {type(dest).__name__}\"\n        )\n\n\ndef to_omegaconf(item):\n    \"\"\"\n    Some parsers return custom classes (i.e. parse_yaml_for_update)\n    that can mess up with omegaconf logic.\n    Cast the custom classes to Python primitives.\n    \"\"\"\n    if isinstance(item, dict):\n        return {k: to_omegaconf(v) for k, v in item.items()}\n    if isinstance(item, list):\n        return [to_omegaconf(x) for x in item]\n    return item\n\n\ndef remove_missing_keys(src, to_update):\n    keys = list(src.keys())\n    for key in keys:\n        if key not in to_update:\n            del src[key]\n        elif isinstance(src[key], dict):\n            remove_missing_keys(src[key], to_update[key])\n\n    return src\n\n\ndef _merge_item(d, key, value):\n    if key in d:\n        item = d.get(key, None)\n        if isinstance(item, dict) and isinstance(value, dict):\n            merge_dicts(item, value)\n        else:\n            d[key] = value\n    else:\n        d[key] = value\n\n\ndef merge_dicts(src: dict, to_update: dict) -> dict:\n    \"\"\"Recursively merges dictionaries.\n\n    Args:\n        src (dict): source dictionary of parameters\n        to_update (dict): dictionary of parameters to merge into src\n    \"\"\"\n    for key, value in to_update.items():\n        _merge_item(src, key, value)\n    return src\n\n\ndef ensure_list(item: Union[Iterable[str], str, None]) -> list[str]:\n    if item is None:\n        return []\n    if isinstance(item, str):\n        return [item]\n    return list(item)\n\n\ndef nested_contains(dictionary: dict, phrase: str) -> bool:\n    for key, val in dictionary.items():\n        if key == phrase and val:\n            return True\n\n        if isinstance(val, dict) and nested_contains(val, phrase):\n            return True\n    return False\n"
  },
  {
    "path": "dvc/utils/diff.py",
    "content": "import json\nfrom collections import defaultdict\n\nfrom .flatten import flatten\n\n\ndef _parse(raw):\n    if raw is None or isinstance(raw, (dict, list, int, float)):\n        return raw\n\n    assert isinstance(raw, str)\n    try:\n        return json.loads(raw)\n    except json.JSONDecodeError:\n        return raw\n\n\ndef _diff_vals(old, new, with_unchanged):\n    if isinstance(new, list) and isinstance(old, list) and len(old) == len(new) == 1:\n        return _diff_vals(old[0], new[0], with_unchanged)\n\n    if not with_unchanged and old == new:\n        return {}\n\n    res = {\"old\": old, \"new\": new}\n    if isinstance(new, (int, float)) and isinstance(old, (int, float)):\n        res[\"diff\"] = new - old\n\n    return res\n\n\ndef _flatten(d):\n    if not d:\n        return defaultdict(lambda: None)\n\n    if isinstance(d, dict):\n        return defaultdict(lambda: None, flatten(d))\n\n    return defaultdict(lambda: \"unable to parse\")\n\n\ndef _diff_dicts(old_dict, new_dict, with_unchanged):\n    new = _flatten(new_dict)\n    old = _flatten(old_dict)\n\n    res: dict[str, dict] = defaultdict(dict)\n\n    xpaths = set(old.keys())\n    xpaths.update(set(new.keys()))\n    for xpath in xpaths:\n        old_val = old[xpath]\n        new_val = new[xpath]\n        val_diff = _diff_vals(old_val, new_val, with_unchanged)\n        if val_diff:\n            res[xpath] = val_diff\n    return dict(res)\n\n\ndef _diff(old_raw, new_raw, with_unchanged):\n    old = _parse(old_raw)\n    new = _parse(new_raw)\n\n    if isinstance(new, dict) or isinstance(old, dict):\n        return _diff_dicts(old, new, with_unchanged)\n\n    val_diff = _diff_vals(old, new, with_unchanged)\n    if val_diff:\n        return {\"\": val_diff}\n\n    return {}\n\n\ndef diff(old, new, with_unchanged=False):\n    paths = set(old.keys())\n    paths.update(set(new.keys()))\n\n    res: dict[str, dict] = defaultdict(dict)\n    for path in paths:\n        path_diff = _diff(\n            old.get(path, {}).get(\"data\", {}),\n            new.get(path, {}).get(\"data\", {}),\n            with_unchanged,\n        )\n        if path_diff:\n            res[path] = path_diff\n    return dict(res)\n\n\ndef format_dict(d):\n    ret = {}\n    for key, val in d.items():\n        if isinstance(val, dict):\n            new_val = format_dict(val)\n        elif isinstance(val, list):\n            new_val = str(val)\n        else:\n            new_val = val\n        ret[key] = new_val\n    return ret\n"
  },
  {
    "path": "dvc/utils/flatten.py",
    "content": "def flatten(d):\n    import flatten_dict\n\n    return flatten_dict.flatten(d, reducer=\"dot\")\n\n\ndef unflatten(d):\n    import flatten_dict\n\n    return flatten_dict.unflatten(d, splitter=\"dot\")\n"
  },
  {
    "path": "dvc/utils/fs.py",
    "content": "import errno\nimport os\nimport shutil\nimport stat\nimport sys\nfrom typing import TYPE_CHECKING\n\nfrom dvc.exceptions import DvcException\nfrom dvc.log import logger\n\nif TYPE_CHECKING:\n    from dvc.types import StrPath\n\nlogger = logger.getChild(__name__)\n\n\nclass BasePathNotInCheckedPathException(DvcException):\n    def __init__(self, path, base_path):\n        msg = f\"Path: {path} does not overlap with base path: {base_path}\"\n        super().__init__(msg)\n\n\ndef contains_symlink_up_to(path: \"StrPath\", base_path: \"StrPath\"):\n    from dvc.fs import system\n\n    base_path = os.path.normcase(os.fspath(base_path))\n    path = os.path.normcase(os.fspath(path))\n\n    if base_path not in path:\n        raise BasePathNotInCheckedPathException(path, base_path)\n\n    if path == base_path:\n        return False\n    if system.is_symlink(path):\n        return True\n    if os.path.dirname(path) == path:\n        return False\n    return contains_symlink_up_to(os.path.dirname(path), base_path)\n\n\ndef _chmod(func, p, excinfo):  # noqa: ARG001\n    perm = os.lstat(p).st_mode\n    perm |= stat.S_IWRITE\n\n    try:\n        os.chmod(p, perm)\n    except OSError as exc:\n        # broken symlink or file is not owned by us\n        if exc.errno not in [errno.ENOENT, errno.EPERM]:\n            raise\n\n    func(p)\n\n\ndef _unlink(path, onerror):\n    try:\n        os.unlink(path)\n    except OSError:\n        onerror(os.unlink, path, sys.exc_info())\n\n\ndef remove(path):\n    logger.debug(\"Removing '%s'\", path)\n\n    try:\n        if os.path.isdir(path):\n            shutil.rmtree(path, onerror=_chmod)\n        else:\n            _unlink(path, _chmod)\n    except OSError as exc:\n        if exc.errno != errno.ENOENT:\n            raise\n\n\ndef path_isin(child: \"StrPath\", parent: \"StrPath\") -> bool:\n    \"\"\"Check if given `child` path is inside `parent`.\"\"\"\n\n    def normalize_path(path) -> str:\n        return os.path.normcase(os.path.normpath(path))\n\n    parent = os.path.join(normalize_path(parent), \"\")\n    child = normalize_path(child)\n    return child != parent and child.startswith(parent)\n"
  },
  {
    "path": "dvc/utils/humanize.py",
    "content": "from funcy import is_seq\n\n\ndef join(words):\n    words = list(words)\n    if not words:\n        return \"\"\n\n    return (\n        \"{before} and {after}\".format(before=\", \".join(words[:-1]), after=words[-1])\n        if len(words) > 1\n        else words[0]\n    )\n\n\ndef get_summary(stats):\n    status = (\n        (state, len(data) if is_seq(data) else data) for state, data in stats if data\n    )\n    return join(\n        \"{} file{} {}\".format(num, \"s\" if num > 1 else \"\", state)\n        for state, num in status\n    )\n\n\nELLIPSIS = \"…\"\n\n\ndef truncate_text(text: str, max_length: int, with_ellipsis: bool = True) -> str:\n    if with_ellipsis and len(text) > max_length:\n        return text[: max_length - 1] + ELLIPSIS\n\n    return text[:max_length]\n\n\ndef naturalsize(value: float, base: int = 1024) -> str:\n    from tqdm import tqdm\n\n    if value < base:\n        return f\"{value:.0f}\"\n    return tqdm.format_sizeof(value, divisor=base)\n"
  },
  {
    "path": "dvc/utils/hydra.py",
    "content": "from pathlib import Path\nfrom typing import TYPE_CHECKING, Optional\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.log import logger\n\nfrom .collections import merge_dicts, remove_missing_keys, to_omegaconf\n\nif TYPE_CHECKING:\n    from dvc.types import StrPath\n\n\nlogger = logger.getChild(__name__)\n\n\ndef load_hydra_plugins(plugins_path: str):\n    import sys\n\n    from hydra.core.plugins import Plugins\n\n    sys.path.append(plugins_path)\n    try:\n        Plugins.instance()\n    finally:\n        sys.path.remove(plugins_path)\n\n\ndef compose_and_dump(\n    output_file: \"StrPath\",\n    config_dir: Optional[str],\n    config_module: Optional[str],\n    config_name: str,\n    plugins_path: str,\n    overrides: list[str],\n) -> None:\n    \"\"\"Compose Hydra config and dumpt it to `output_file`.\n\n    Args:\n        output_file: File where the composed config will be dumped.\n        config_dir: Folder containing the Hydra config files.\n            Must be absolute file system path.\n        config_module: Module containing the Hydra config files.\n            Ignored if `config_dir` is not `None`.\n        config_name: Name of the config file containing defaults,\n            without the .yaml extension.\n        plugins_path: Path to auto discover Hydra plugins.\n        overrides: List of `Hydra Override`_ patterns.\n\n    .. _Hydra Override:\n        https://hydra.cc/docs/advanced/override_grammar/basic/\n    \"\"\"\n    from hydra import compose, initialize_config_dir, initialize_config_module\n    from omegaconf import OmegaConf\n\n    from .serialize import DUMPERS\n\n    config_source = config_dir or config_module\n    if not config_source:\n        raise ValueError(\"Either `config_dir` or `config_module` should be provided.\")\n    initialize_config = (\n        initialize_config_dir if config_dir else initialize_config_module\n    )\n\n    load_hydra_plugins(plugins_path)\n    with initialize_config(  # type: ignore[attr-defined]\n        config_source, version_base=None\n    ):\n        cfg = compose(config_name=config_name, overrides=overrides)\n\n    OmegaConf.resolve(cfg)\n\n    suffix = Path(output_file).suffix.lower()\n    if suffix not in [\".yml\", \".yaml\"]:\n        dumper = DUMPERS[suffix]\n        dumper(output_file, OmegaConf.to_object(cfg))\n    else:\n        Path(output_file).write_text(OmegaConf.to_yaml(cfg), encoding=\"utf-8\")\n    logger.trace(\n        \"Hydra composition enabled. Contents dumped to %s:\\n %s\", output_file, cfg\n    )\n\n\ndef apply_overrides(path: \"StrPath\", overrides: list[str]) -> None:\n    \"\"\"Update `path` params with the provided `Hydra Override`_ patterns.\n\n    Args:\n        overrides: List of `Hydra Override`_ patterns.\n\n    .. _Hydra Override:\n        https://hydra.cc/docs/next/advanced/override_grammar/basic/\n    \"\"\"\n    from hydra._internal.config_loader_impl import ConfigLoaderImpl\n    from hydra.errors import ConfigCompositionException, OverrideParseException\n    from omegaconf import OmegaConf\n\n    from .serialize import MODIFIERS\n\n    suffix = Path(path).suffix.lower()\n\n    hydra_errors = (ConfigCompositionException, OverrideParseException)\n\n    modify_data = MODIFIERS[suffix]\n    with modify_data(path) as original_data:\n        try:\n            parsed = to_hydra_overrides(overrides)\n\n            new_data = OmegaConf.create(\n                to_omegaconf(original_data),\n                flags={\"allow_objects\": True},\n            )\n            OmegaConf.set_struct(new_data, True)\n\n            ConfigLoaderImpl._apply_overrides_to_config(parsed, new_data)\n            new_data = OmegaConf.to_object(new_data)\n        except hydra_errors as e:\n            raise InvalidArgumentError(\"Invalid `--set-param` value\") from e\n\n        assert isinstance(new_data, dict)\n        merge_dicts(original_data, new_data)\n        remove_missing_keys(original_data, new_data)\n\n\ndef to_hydra_overrides(path_overrides):\n    from hydra.core.override_parser.overrides_parser import OverridesParser\n\n    parser = OverridesParser.create()\n    return parser.parse_overrides(overrides=path_overrides)\n\n\ndef dict_product(dicts):\n    import itertools\n\n    return [dict(zip(dicts, x)) for x in itertools.product(*dicts.values())]\n\n\ndef get_hydra_sweeps(path_overrides):\n    from hydra._internal.core_plugins.basic_sweeper import BasicSweeper\n    from hydra.core.override_parser.types import ValueType\n\n    path_sweeps = {}\n    for path, overrides in path_overrides.items():\n        overrides = to_hydra_overrides(overrides)\n        for override in overrides:\n            if override.value_type == ValueType.GLOB_CHOICE_SWEEP:\n                raise InvalidArgumentError(\n                    f\"Glob override '{override.input_line}' is not supported.\"\n                )\n        path_sweeps[path] = BasicSweeper.split_arguments(overrides, None)[0]\n    return dict_product(path_sweeps)\n"
  },
  {
    "path": "dvc/utils/objects.py",
    "content": "from typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n    from functools import cached_property\nelse:\n    from funcy import cached_property  # noqa: TID251\n\n__all__ = [\"cached_property\"]\n"
  },
  {
    "path": "dvc/utils/packaging.py",
    "content": "import logging\n\nfrom funcy import once_per_args\n\nfrom dvc.log import logger\n\nlogger = logger.getChild(__name__)\n\n\n@once_per_args\ndef check_required_version(pkg: str, dist: str = \"dvc\", log_level=logging.WARNING):\n    from importlib import metadata\n\n    from packaging.requirements import InvalidRequirement, Requirement\n\n    try:\n        reqs = {\n            r.name: r.specifier for r in map(Requirement, metadata.requires(dist) or [])\n        }\n        version = metadata.version(pkg)\n    except (metadata.PackageNotFoundError, InvalidRequirement):\n        return\n\n    specifier = reqs.get(pkg)\n    if specifier and version and version not in specifier:\n        logger.log(\n            log_level,\n            \"%s%s is required, but you have %r installed which is incompatible.\",\n            pkg,\n            specifier,\n            version,\n        )\n"
  },
  {
    "path": "dvc/utils/plots.py",
    "content": "from collections import defaultdict\n\n\ndef get_plot_id(config_plot_id: str, config_file_path: str = \"\"):\n    return (\n        f\"{config_file_path}::{config_plot_id}\" if config_file_path else config_plot_id\n    )\n\n\ndef group_definitions_by_id(\n    definitions: dict[str, dict],\n) -> dict[str, tuple[str, dict]]:\n    \"\"\"\n    Format ID and extracts plot_definition for each plot.\n\n    Arguments:\n        definitions: dict of {config_file: config_file_content}.\n\n    Returns:\n        Dict of {plot_id: (original_plot_id, plot_definition)}.\n    \"\"\"\n    groups_by_config: dict = defaultdict(dict)\n    groups_by_id: dict = {}\n    for config_file, config_file_content in definitions.items():\n        for plot_id, plot_definition in config_file_content.get(\"data\", {}).items():\n            groups_by_config[plot_id][config_file] = (plot_id, plot_definition)\n    # only keep config_file if the same plot_id is in multiple config_files\n    for plot_id, configs in groups_by_config.items():\n        if len(configs) == 1:\n            groups_by_id[plot_id] = next(iter(configs.values()))\n        else:\n            for config_file, content in configs.items():\n                full_id = get_plot_id(plot_id, config_file)\n                groups_by_id[full_id] = content\n    return groups_by_id\n"
  },
  {
    "path": "dvc/utils/serialize/__init__.py",
    "content": "from collections import defaultdict\n\nfrom ._common import *  # noqa: F403\nfrom ._json import *  # noqa: F403\nfrom ._py import *  # noqa: F403\nfrom ._toml import *  # noqa: F403\nfrom ._yaml import *  # noqa: F403\n\nLOADERS: defaultdict[str, LoaderFn] = defaultdict(  # noqa: F405\n    lambda: load_yaml  # noqa: F405\n)\nLOADERS.update({\".toml\": load_toml, \".json\": load_json, \".py\": load_py})  # noqa: F405\n\nPARSERS: defaultdict[str, ParserFn] = defaultdict(  # noqa: F405\n    lambda: parse_yaml  # noqa: F405\n)\nPARSERS.update(\n    {\".toml\": parse_toml, \".json\": parse_json, \".py\": parse_py}  # noqa: F405\n)\n\n\ndef load_path(fs_path, fs, **kwargs):\n    suffix = fs.suffix(fs_path).lower()\n    loader = LOADERS[suffix]\n    return loader(fs_path, fs=fs, **kwargs)\n\n\nDUMPERS: defaultdict[str, DumperFn] = defaultdict(  # noqa: F405\n    lambda: dump_yaml  # noqa: F405\n)\nDUMPERS.update({\".toml\": dump_toml, \".json\": dump_json, \".py\": dump_py})  # noqa: F405\n\nMODIFIERS: defaultdict[str, ModifierFn] = defaultdict(  # noqa: F405\n    lambda: modify_yaml  # noqa: F405\n)\nMODIFIERS.update(\n    {\n        \".toml\": modify_toml,  # noqa: F405\n        \".json\": modify_json,  # noqa: F405\n        \".py\": modify_py,  # noqa: F405\n    }\n)\n"
  },
  {
    "path": "dvc/utils/serialize/_common.py",
    "content": "\"\"\"Common utilities for serialize.\"\"\"\n\nimport os\nfrom contextlib import AbstractContextManager, contextmanager\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, Protocol, TextIO, Union\n\nfrom funcy import reraise\n\nfrom dvc.exceptions import DvcException\n\nif TYPE_CHECKING:\n    from dvc.fs import FileSystem\n    from dvc.types import StrPath\n\n\nclass DumperFn(Protocol):\n    def __call__(\n        self, path: \"StrPath\", data: Any, fs: Optional[\"FileSystem\"] = None\n    ) -> Any: ...\n\n\nclass DumpersFn(Protocol):\n    def __call__(self, data: Any, stream: TextIO) -> Any: ...\n\n\nclass ModifierFn(Protocol):\n    def __call__(\n        self, path: \"StrPath\", fs: Optional[\"FileSystem\"] = None\n    ) -> AbstractContextManager[dict]: ...\n\n\nclass LoaderFn(Protocol):\n    def __call__(self, path: \"StrPath\", fs: Optional[\"FileSystem\"] = None) -> Any: ...\n\n\nReadType = Union[bytes, str, None]\nParserFn = Callable[[ReadType, \"StrPath\"], dict]\n\n\nclass ParseError(DvcException):\n    \"\"\"Errors while parsing files\"\"\"\n\n    def __init__(self, path: \"StrPath\", message: str):\n        from dvc.utils import relpath\n\n        path = relpath(path)\n        self.path = path\n        super().__init__(f\"unable to read: '{path}', {message}\")\n\n\nclass EncodingError(ParseError):\n    \"\"\"We could not read a file with the given encoding\"\"\"\n\n    def __init__(self, path: \"StrPath\", encoding: str):\n        self.encoding = encoding\n        super().__init__(path, f\"is not valid {encoding}\")\n\n\ndef _load_data(\n    path: \"StrPath\", parser: ParserFn, fs: Optional[\"FileSystem\"] = None, **kwargs\n):\n    open_fn = fs.open if fs else open\n    encoding = \"utf-8\"\n    with open_fn(path, encoding=encoding, **kwargs) as fd:  # type: ignore[arg-type]\n        with reraise(UnicodeDecodeError, EncodingError(path, encoding)):\n            return parser(fd.read(), path)\n\n\ndef _dump_data(\n    path,\n    data: Any,\n    dumper: DumpersFn,\n    fs: Optional[\"FileSystem\"] = None,\n    **dumper_args,\n):\n    open_fn = fs.open if fs else open\n    with open_fn(path, \"w+\", encoding=\"utf-8\") as fd:  # type: ignore[call-overload]\n        dumper(data, fd, **dumper_args)\n\n\n@contextmanager\ndef _modify_data(\n    path: \"StrPath\",\n    parser: ParserFn,\n    dumper: DumpersFn,\n    fs: Optional[\"FileSystem\"] = None,\n):\n    file_exists = fs.exists(os.fspath(path)) if fs else os.path.exists(path)\n    data = _load_data(path, parser=parser, fs=fs) if file_exists else {}\n    yield data\n    _dump_data(path, data, dumper=dumper, fs=fs)\n"
  },
  {
    "path": "dvc/utils/serialize/_json.py",
    "content": "import json\n\nfrom funcy import contextmanager, reraise\n\nfrom ._common import ParseError, _dump_data, _load_data, _modify_data\n\n\nclass JSONFileCorruptedError(ParseError):\n    def __init__(self, path):\n        super().__init__(path, \"JSON file structure is corrupted\")\n\n\ndef load_json(path, fs=None, **kwargs):\n    return _load_data(path, parser=parse_json, fs=fs, **kwargs)\n\n\ndef parse_json(text, path, **kwargs):\n    with reraise(json.JSONDecodeError, JSONFileCorruptedError(path)):\n        return json.loads(text, **kwargs) or {}\n\n\ndef _dump_json(data, stream, **kwargs):\n    return json.dump(data, stream, **kwargs)\n\n\ndef dump_json(path, data, fs=None, **kwargs):\n    return _dump_data(path, data, dumper=_dump_json, fs=fs, **kwargs)\n\n\n@contextmanager\ndef modify_json(path, fs=None):\n    with _modify_data(path, parse_json, _dump_json, fs=fs) as d:\n        yield d\n\n\ndef encode_exception(o):\n    if isinstance(o, Exception):\n        return {\"type\": type(o).__name__, \"msg\": str(o)}\n    raise TypeError\n"
  },
  {
    "path": "dvc/utils/serialize/_py.py",
    "content": "import ast\nfrom contextlib import contextmanager\nfrom typing import Any\n\nfrom funcy import reraise\n\nfrom ._common import ParseError, _dump_data, _load_data, _modify_data\n\n_PARAMS_KEY = \"__params_old_key_for_update__\"\n_PARAMS_TEXT_KEY = \"__params_text_key_for_update__\"\n\n\nclass PythonFileCorruptedError(ParseError):\n    def __init__(self, path, message=\"Python file structure is corrupted\"):\n        super().__init__(path, message)\n\n\ndef load_py(path, fs=None, **kwargs):\n    return _load_data(path, parser=parse_py, fs=fs, **kwargs)\n\n\ndef parse_py(text, path):\n    \"\"\"Parses text from .py file into Python structure.\"\"\"\n    with reraise(SyntaxError, PythonFileCorruptedError(path)):\n        tree = ast.parse(text, filename=path)\n\n    return _ast_tree_to_dict(tree)\n\n\ndef parse_py_for_update(text, path):\n    \"\"\"Parses text into dict for update params.\"\"\"\n    with reraise(SyntaxError, PythonFileCorruptedError(path)):\n        tree = ast.parse(text, filename=path)\n\n    result = _ast_tree_to_dict(tree)\n    result.update({_PARAMS_KEY: _ast_tree_to_dict(tree, lineno=True)})\n    result.update({_PARAMS_TEXT_KEY: text})\n    return result\n\n\ndef _dump(data, stream):\n    old_params = data[_PARAMS_KEY]\n    new_params = {\n        key: value\n        for key, value in data.items()\n        if key not in [_PARAMS_KEY, _PARAMS_TEXT_KEY]\n    }\n    old_lines = data[_PARAMS_TEXT_KEY].splitlines(True)\n\n    def _update_lines(lines, old_dct, new_dct):\n        for key, value in new_dct.items():\n            if isinstance(value, dict):\n                lines = _update_lines(lines, old_dct[key], value)\n            elif value != old_dct[key][\"value\"]:\n                lineno = old_dct[key][\"lineno\"]\n                lines[lineno] = lines[lineno].replace(\n                    f\" = {old_dct[key]['value']}\", f\" = {value}\"\n                )\n            else:\n                continue\n        return lines\n\n    new_lines = _update_lines(old_lines, old_params, new_params)\n    new_text = \"\".join(new_lines)\n\n    try:\n        ast.parse(new_text)\n    except SyntaxError:\n        raise PythonFileCorruptedError(  # noqa: B904\n            stream.name,\n            \"Python file structure is corrupted after update params\",\n        )\n\n    stream.write(new_text)\n    stream.close()\n\n\ndef dump_py(path, data, fs=None):\n    return _dump_data(path, data, dumper=_dump, fs=fs)\n\n\n@contextmanager\ndef modify_py(path, fs=None):\n    with _modify_data(path, parse_py_for_update, _dump, fs=fs) as d:\n        yield d\n\n\ndef _ast_tree_to_dict(tree, only_self_params=False, lineno=False):\n    \"\"\"Parses ast trees to dict.\n\n    :param tree: ast.Tree\n    :param only_self_params: get only self params from class __init__ function\n    :param lineno: add params line number (needed for update)\n    :return:\n    \"\"\"\n    result = {}\n    for _body in tree.body:\n        try:\n            if isinstance(_body, (ast.Assign, ast.AnnAssign)):\n                result.update(_ast_assign_to_dict(_body, only_self_params, lineno))\n            elif isinstance(_body, ast.ClassDef):\n                result.update({_body.name: _ast_tree_to_dict(_body, lineno=lineno)})\n            elif isinstance(_body, ast.FunctionDef) and _body.name == \"__init__\":\n                result.update(\n                    _ast_tree_to_dict(_body, only_self_params=True, lineno=lineno)\n                )\n        except ValueError:\n            continue\n        except AttributeError:\n            continue\n    return result\n\n\ndef _ast_assign_to_dict(assign, only_self_params=False, lineno=False):  # noqa: PLR0912\n    result = {}\n\n    if isinstance(assign, ast.AnnAssign):\n        name = _get_ast_name(assign.target, only_self_params)\n    elif len(assign.targets) == 1:\n        name = _get_ast_name(assign.targets[0], only_self_params)\n    else:\n        raise AttributeError\n\n    value: Any\n    if isinstance(assign.value, ast.Dict):\n        value = {}\n        for key, val in zip(assign.value.keys, assign.value.values):\n            if lineno:\n                value[ast.literal_eval(key)] = {  # type: ignore[arg-type]\n                    \"lineno\": assign.lineno - 1,\n                    \"value\": ast.literal_eval(val),\n                }\n            else:\n                v = ast.literal_eval(val)\n                value[ast.literal_eval(key)] = v  # type: ignore[arg-type]\n    elif isinstance(assign.value, ast.List):\n        value = [ast.literal_eval(val) for val in assign.value.elts]\n    elif isinstance(assign.value, ast.Set):\n        values = [ast.literal_eval(val) for val in assign.value.elts]\n        value = set(values)\n    elif isinstance(assign.value, ast.Tuple):\n        values = [ast.literal_eval(val) for val in assign.value.elts]\n        value = tuple(values)\n    else:\n        value = ast.literal_eval(assign.value)\n\n    if lineno and not isinstance(assign.value, ast.Dict):\n        result[name] = {\"lineno\": assign.lineno - 1, \"value\": value}\n    else:\n        result[name] = value\n\n    return result\n\n\ndef _get_ast_name(target, only_self_params=False):\n    if hasattr(target, \"id\") and not only_self_params:\n        return target.id\n    if hasattr(target, \"attr\") and target.value.id == \"self\":\n        return target.attr\n    raise AttributeError\n"
  },
  {
    "path": "dvc/utils/serialize/_toml.py",
    "content": "from contextlib import contextmanager\n\nfrom funcy import reraise\n\nfrom ._common import ParseError, _dump_data, _load_data, _modify_data\n\n\nclass TOMLFileCorruptedError(ParseError):\n    def __init__(self, path):\n        super().__init__(path, \"TOML file structure is corrupted\")\n\n\ndef load_toml(path, fs=None, **kwargs):\n    return _load_data(path, parser=parse_toml, fs=fs, **kwargs)\n\n\ndef _parse_toml(text, path):\n    from tomlkit import loads\n    from tomlkit.exceptions import ParseError as TomlkitParseError\n\n    with reraise(TomlkitParseError, TOMLFileCorruptedError(path)):\n        return loads(text)\n\n\ndef parse_toml(text, path, preserve_comments=False):\n    rval = _parse_toml(text, path)\n\n    if preserve_comments:\n        return rval\n\n    return rval.unwrap()\n\n\ndef parse_toml_for_update(text, path):\n    return parse_toml(text, path, preserve_comments=True)\n\n\ndef _dump(data, stream, sort_keys=False):\n    import tomlkit\n\n    return tomlkit.dump(data, stream, sort_keys=sort_keys)\n\n\ndef dump_toml(path, data, fs=None, **kwargs):\n    return _dump_data(path, data, dumper=_dump, fs=fs, **kwargs)\n\n\n@contextmanager\ndef modify_toml(path, fs=None):\n    with _modify_data(path, parse_toml_for_update, _dump, fs=fs) as d:\n        yield d\n"
  },
  {
    "path": "dvc/utils/serialize/_yaml.py",
    "content": "import io\nfrom collections import OrderedDict\nfrom contextlib import contextmanager\nfrom typing import Any, TextIO\n\nfrom funcy import reraise\n\nfrom ._common import ParseError, _dump_data, _load_data, _modify_data\n\n\nclass YAMLError(ParseError):\n    pass\n\n\nclass YAMLFileCorruptedError(YAMLError):\n    def __init__(self, path):\n        super().__init__(path, \"YAML file structure is corrupted\")\n\n\ndef load_yaml(path, fs=None, **kwargs):\n    return _load_data(path, parser=parse_yaml, fs=fs, **kwargs)\n\n\ndef parse_yaml(text, path, typ=\"safe\"):\n    from ruamel.yaml import YAML\n    from ruamel.yaml import YAMLError as _YAMLError\n\n    yaml = YAML(typ=typ)\n    with reraise(_YAMLError, YAMLFileCorruptedError(path)):\n        return yaml.load(text) or {}\n\n\ndef parse_yaml_for_update(text, path):\n    \"\"\"Parses text into Python structure.\n\n    Unlike `parse_yaml()` this returns ordered dicts, values have special\n    attributes to store comments and line breaks. This allows us to preserve\n    all of those upon dump.\n\n    This one is, however, several times slower than simple `parse_yaml()`.\n    \"\"\"\n    return parse_yaml(text, path, typ=\"rt\")\n\n\ndef _get_yaml():\n    from ruamel.yaml import YAML\n\n    yaml = YAML()\n    yaml.default_flow_style = False\n\n    # tell Dumper to represent OrderedDict as normal dict\n    yaml_repr_cls = yaml.Representer\n    yaml_repr_cls.add_representer(OrderedDict, yaml_repr_cls.represent_dict)\n    return yaml\n\n\ndef _dump(data: Any, stream: TextIO) -> Any:\n    yaml = _get_yaml()\n    return yaml.dump(data, stream)\n\n\ndef dump_yaml(path, data, fs=None, **kwargs):\n    return _dump_data(path, data, dumper=_dump, fs=fs, **kwargs)\n\n\ndef loads_yaml(s, typ=\"safe\"):\n    from ruamel.yaml import YAML\n\n    return YAML(typ=typ).load(s)\n\n\ndef dumps_yaml(d):\n    stream = io.StringIO()\n    _dump(d, stream)\n    return stream.getvalue()\n\n\n@contextmanager\ndef modify_yaml(path, fs=None):\n    with _modify_data(path, parse_yaml_for_update, _dump, fs=fs) as d:\n        yield d\n"
  },
  {
    "path": "dvc/utils/strictyaml.py",
    "content": "\"\"\"\nThis module combines schema and yaml parser into one, to provide better error\nmessages through a single entrypoint `load`.\n\nUsed for parsing dvc.yaml, dvc.lock and .dvc files.\n\nNot to be confused with strictyaml, a python library with similar motivations.\n\"\"\"\n\nimport re\nimport typing\nfrom contextlib import suppress\nfrom typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar\n\nfrom dvc.exceptions import PrettyDvcException\nfrom dvc.ui import ui\nfrom dvc.utils.serialize import (\n    EncodingError,\n    YAMLFileCorruptedError,\n    parse_yaml,\n    parse_yaml_for_update,\n)\nfrom dvc_objects.fs.local import LocalFileSystem\n\nif TYPE_CHECKING:\n    from rich.syntax import Syntax\n    from ruamel.yaml import StreamMark\n    from voluptuous import MultipleInvalid\n\n    from dvc.fs import FileSystem\n    from dvc.ui import RichText\n\n\n_T = TypeVar(\"_T\")\nmerge_conflict_marker = re.compile(\"^([<=>]{7}) .*$\", re.MULTILINE)\n\n\ndef make_relpath(fs_path: str, fs: \"Optional[FileSystem]\" = None) -> str:\n    from os import curdir, pardir, sep\n\n    from dvc.utils import relpath\n\n    if not fs_path:\n        return fs_path\n\n    if fs and not isinstance(fs, LocalFileSystem):\n        rel = fs.relpath(fs_path).replace(fs.sep, sep)\n    else:\n        rel = relpath(fs_path)\n\n    if rel.startswith(sep):\n        # if it is an absolute path, return as it is.\n        # This can happen on Windows when we are on a different drive.\n        return rel\n    prefix = curdir + sep if not rel.startswith(pardir) else \"\"\n    return prefix + rel\n\n\ndef _prepare_message(message: str) -> \"RichText\":\n    return ui.rich_text(message, style=\"red\")\n\n\ndef _prepare_cause(cause: str) -> \"RichText\":\n    return ui.rich_text(cause, style=\"bold\")\n\n\ndef _prepare_code_snippets(code: str, start_line: int = 1, **kwargs: Any) -> \"Syntax\":\n    from rich.syntax import Syntax\n\n    kwargs.setdefault(\"start_line\", start_line)\n    return Syntax(\n        code,\n        \"yaml\",\n        theme=\"ansi_dark\",\n        word_wrap=True,\n        line_numbers=True,\n        indent_guides=True,\n        **kwargs,\n    )\n\n\nclass YAMLSyntaxError(PrettyDvcException, YAMLFileCorruptedError):\n    def __init__(\n        self,\n        path: str,\n        yaml_text: str,\n        exc: Exception,\n        rev: Optional[str] = None,\n    ) -> None:\n        self.yaml_text: str = yaml_text\n        self.exc: Exception = exc\n\n        merge_conflicts = merge_conflict_marker.search(self.yaml_text)\n        self.hint = \" (possible merge conflicts)\" if merge_conflicts else \"\"\n        self.rev: Optional[str] = rev\n        super().__init__(path)\n        self.relpath: str = path\n\n    def __pretty_exc__(self, **kwargs: Any) -> None:  # noqa: C901\n        from ruamel.yaml.error import MarkedYAMLError\n\n        exc = self.exc.__cause__\n\n        if not isinstance(exc, MarkedYAMLError):\n            raise ValueError(\"nothing to pretty-print here.\")  # noqa: TRY004\n\n        source = self.yaml_text.splitlines()\n\n        def prepare_linecol(mark: \"StreamMark\") -> str:\n            return f\"in line {mark.line + 1}, column {mark.column + 1}\"\n\n        def prepare_message(\n            message: str, mark: Optional[\"StreamMark\"] = None\n        ) -> \"RichText\":\n            cause = \", \".join(\n                [message.capitalize(), prepare_linecol(mark) if mark else \"\"]\n            )\n            return _prepare_cause(cause)\n\n        def prepare_code(mark: \"StreamMark\") -> \"Syntax\":\n            line = mark.line + 1\n            code = \"\" if line > len(source) else source[line - 1]\n            return _prepare_code_snippets(code, line)\n\n        lines: list[object] = []\n        if hasattr(exc, \"context\"):\n            if exc.context_mark is not None:\n                lines.append(prepare_message(str(exc.context), exc.context_mark))\n            if exc.context_mark is not None and (\n                exc.problem is None\n                or exc.problem_mark is None\n                or exc.context_mark.name != exc.problem_mark.name\n                or exc.context_mark.line != exc.problem_mark.line\n                or exc.context_mark.column != exc.problem_mark.column\n            ):\n                lines.extend([prepare_code(exc.context_mark), \"\"])\n            if exc.problem is not None:\n                lines.append(prepare_message(str(exc.problem), exc.problem_mark))\n            if exc.problem_mark is not None:\n                lines.append(prepare_code(exc.problem_mark))\n\n        if lines:\n            # we should not add a newline after the main message\n            # if there are no other outputs\n            lines.insert(0, \"\")\n\n        rev_msg = f\" in revision '{self.rev[:7]}'\" if self.rev else \"\"\n        msg_fmt = f\"'{self.relpath}' is invalid{self.hint}{rev_msg}.\"\n        lines.insert(0, _prepare_message(msg_fmt))\n        for line in lines:\n            ui.error_write(line, styled=True)\n\n\ndef determine_linecol(\n    data, paths, max_steps=5\n) -> tuple[Optional[int], Optional[int], int]:\n    \"\"\"Determine linecol from the CommentedMap for the `paths` location.\n\n    CommentedMap from `ruamel.yaml` has `.lc` property from which we can read\n    `.line` and `.col`. This is available in the collections type,\n    i.e. list and dictionaries.\n\n    But this may fail on non-collection types. For example, if the `paths` is\n    ['stages', 'metrics'], metrics being a boolean type does not have `lc`\n    prop.\n    ```\n    stages:\n      metrics: true\n    ```\n\n    To provide some context to the user, we step up to the\n    path ['stages'], which being a collection type, will have `lc` prop\n    with which we can find line and col.\n\n    This may end up being not accurate, so we try to show the same amount of\n    lines of code for `n` number of steps taken upwards. In a worst case,\n    it may be just 1 step (as non-collection item cannot have child items),\n    but `schema validator` may provide us arbitrary path. So, this caps the\n    number of steps upward to just 5. If it does not find any linecols, it'll\n    abort.\n    \"\"\"\n    from dpath import get\n\n    step = 1\n    line, col = None, None\n    while paths and step < max_steps:\n        value = get(data, paths, default=None)\n        if value is not None:\n            with suppress(AttributeError, TypeError):\n                line = value.lc.line + 1  # type: ignore[attr-defined]\n                col = value.lc.col + 1  # type: ignore[attr-defined]\n                break\n        step += 1\n        *paths, _ = paths\n\n    return line, col, step\n\n\nclass YAMLValidationError(PrettyDvcException):\n    def __init__(\n        self,\n        exc: \"MultipleInvalid\",\n        path: Optional[str] = None,\n        text: Optional[str] = None,\n        rev: Optional[str] = None,\n    ) -> None:\n        self.text = text or \"\"\n        self.exc = exc\n\n        self.path = path or \"\"\n\n        message = f\"'{self.path}' validation failed\"\n        message += f\" in revision '{rev[:7]}'\" if rev else \"\"\n        if len(self.exc.errors) > 1:\n            message += f\": {len(self.exc.errors)} errors\"\n        super().__init__(f\"{message}\")\n\n    def _prepare_context(self, data: typing.Mapping) -> list[object]:\n        lines: list[object] = []\n        for index, error in enumerate(self.exc.errors):\n            if index and lines[-1]:\n                lines.append(\"\")\n            line, col, step = determine_linecol(data, error.path)\n            parts = [error.error_message]\n            if error.path:\n                parts.append(\"in \" + \" -> \".join(str(p) for p in error.path))\n            if line:\n                parts.append(f\"line {line}\")\n            if col:\n                parts.append(f\"column {col}\")\n            lines.append(_prepare_cause(\", \".join(parts)))\n\n            if line:\n                # we show one line above the error\n                # we try to show few more lines if we could not\n                # reliably figure out where the error was\n                lr = (line - 1, line + step - 1)\n                code = _prepare_code_snippets(self.text, line_range=lr)\n                lines.append(code)\n        return lines\n\n    def __pretty_exc__(self, **kwargs: Any) -> None:\n        \"\"\"Prettify exception message.\"\"\"\n        from collections.abc import Mapping\n\n        lines: list[object] = []\n        data = parse_yaml_for_update(self.text, self.path)\n        if isinstance(data, Mapping):\n            lines.extend(self._prepare_context(data))\n\n        cause = \"\"\n        if lines:\n            # we should not add a newline after the main message\n            # if there are no other outputs\n            lines.insert(0, \"\")\n        else:\n            # if we don't have any context to show, we'll fallback to what we\n            # got from voluptuous and print them in the same line.\n            cause = f\": {self.exc}\"\n\n        lines.insert(0, _prepare_message(f\"{self}{cause}.\"))\n        for line in lines:\n            ui.error_write(line, styled=True)\n\n\ndef validate(\n    data: _T,\n    schema: Callable[[_T], _T],\n    text: Optional[str] = None,\n    path: Optional[str] = None,\n    rev: Optional[str] = None,\n) -> _T:\n    from voluptuous import MultipleInvalid\n\n    try:\n        return schema(data)\n    except MultipleInvalid as exc:\n        raise YAMLValidationError(exc, path, text, rev=rev) from exc\n\n\ndef load(\n    path: str,\n    schema: Optional[Callable[[_T], _T]] = None,\n    fs: Optional[\"FileSystem\"] = None,\n    encoding: str = \"utf-8\",\n    round_trip: bool = False,\n) -> Any:\n    open_fn = fs.open if fs else open\n    rev = getattr(fs, \"rev\", None)\n\n    try:\n        with open_fn(path, encoding=encoding) as fd:  # type: ignore[operator]\n            text = fd.read()\n        data = parse_yaml(text, path, typ=\"rt\" if round_trip else \"safe\")\n    except UnicodeDecodeError as exc:\n        raise EncodingError(path, encoding) from exc\n    except YAMLFileCorruptedError as exc:\n        cause = exc.__cause__\n        relpath = make_relpath(path, fs)\n        raise YAMLSyntaxError(relpath, text, exc, rev=rev) from cause\n\n    if schema:\n        relpath = make_relpath(path, fs)\n        # not returning validated data, as it may remove\n        # details from CommentedMap that we get from roundtrip parser\n        validate(data, schema, text=text, path=relpath, rev=rev)\n    return data, text\n"
  },
  {
    "path": "dvc/utils/studio.py",
    "content": "import os\nfrom typing import TYPE_CHECKING, Any, Optional\nfrom urllib.parse import urljoin\n\nimport requests\nfrom funcy import compact, ignore\nfrom requests.adapters import HTTPAdapter\n\nfrom dvc.env import (\n    DVC_STUDIO_OFFLINE,\n    DVC_STUDIO_REPO_URL,\n    DVC_STUDIO_TOKEN,\n    DVC_STUDIO_URL,\n)\nfrom dvc.log import logger\nfrom dvc.utils import as_posix\n\nif TYPE_CHECKING:\n    from requests import Response\n\n    from dvc.repo import Repo\n\n\nlogger = logger.getChild(__name__)\n\nSTUDIO_URL = \"https://studio.datachain.ai\"\n\n\ndef post(\n    url: str,\n    token: str,\n    data: dict[str, Any],\n    base_url: Optional[str] = STUDIO_URL,\n    max_retries: int = 3,\n    timeout: int = 5,\n) -> \"Response\":\n    url = urljoin(base_url or STUDIO_URL, url)\n    session = requests.Session()\n    session.mount(url, HTTPAdapter(max_retries=max_retries))\n\n    logger.trace(\"Sending %s to %s\", data, url)\n\n    headers = {\"Authorization\": f\"token {token}\"}\n    r = session.post(\n        url, json=data, headers=headers, timeout=timeout, allow_redirects=False\n    )\n    r.raise_for_status()\n    return r\n\n\ndef notify_refs(\n    repo_url: str,\n    token: str,\n    *,\n    base_url: Optional[str] = STUDIO_URL,\n    **refs: list[str],\n) -> dict[str, Any]:\n    extra_keys = refs.keys() - {\"pushed\", \"removed\"}\n    assert not extra_keys, f\"got extra args: {extra_keys}\"\n\n    refs = compact(refs)\n    if not refs:\n        return {}\n\n    logger.debug(\n        \"notifying Studio%s about updated experiments\",\n        f\" ({base_url})\" if base_url else \"\",\n    )\n    data = {\"repo_url\": repo_url, \"client\": \"dvc\", \"refs\": refs}\n\n    try:\n        r = post(\"webhook/dvc\", token, data, base_url=base_url)\n    except requests.RequestException as e:\n        logger.trace(\"\", exc_info=True)\n\n        msg = str(e)\n        if e.response is None:\n            logger.warning(\"failed to notify Studio: %s\", msg.lower())\n            return {}\n\n        r = e.response\n        d = ignore(Exception, default={})(r.json)()\n        status = r.status_code\n        if detail := d.get(\"detail\"):\n            msg = f\"{detail} ({status=})\"\n        logger.warning(\"failed to notify Studio: %s\", msg.lower())\n    else:\n        d = r.json()\n\n    if d:\n        logger.trace(\"received response: %s (status=%r)\", d, r.status_code)\n    return d\n\n\ndef config_to_env(config: dict[str, Any]) -> dict[str, Any]:\n    env = {}\n    if \"offline\" in config:\n        env[DVC_STUDIO_OFFLINE] = config[\"offline\"]\n    if \"repo_url\" in config:\n        env[DVC_STUDIO_REPO_URL] = config[\"repo_url\"]\n    if \"token\" in config:\n        env[DVC_STUDIO_TOKEN] = config[\"token\"]\n    if \"url\" in config:\n        env[DVC_STUDIO_URL] = config[\"url\"]\n    return env\n\n\ndef env_to_config(env: dict[str, Any]) -> dict[str, Any]:\n    config = {}\n    if DVC_STUDIO_OFFLINE in env:\n        config[\"offline\"] = env[DVC_STUDIO_OFFLINE]\n    if DVC_STUDIO_REPO_URL in env:\n        config[\"repo_url\"] = env[DVC_STUDIO_REPO_URL]\n    if DVC_STUDIO_TOKEN in env:\n        config[\"token\"] = env[DVC_STUDIO_TOKEN]\n    if DVC_STUDIO_URL in env:\n        config[\"url\"] = env[DVC_STUDIO_URL]\n    return config\n\n\ndef get_subrepo_relpath(repo: \"Repo\") -> str:\n    from dvc.fs import GitFileSystem\n\n    scm_root_dir = \"/\" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir\n\n    relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir))\n\n    return \"\" if relpath == \".\" else relpath\n\n\ndef get_repo_url(repo: \"Repo\") -> str:\n    from dulwich.porcelain import get_remote_repo\n\n    from dvc.env import DVC_EXP_GIT_REMOTE\n\n    repo_url = os.getenv(\n        DVC_EXP_GIT_REMOTE, repo.config.get(\"exp\", {}).get(\"git_remote\")\n    )\n    if repo_url:\n        try:\n            _, repo_url = get_remote_repo(repo.scm.dulwich.repo, repo_url)\n        except IndexError:\n            pass\n    return repo_url\n"
  },
  {
    "path": "dvc/utils/table.py",
    "content": "from typing import TYPE_CHECKING, Any\n\nfrom rich.table import Table as RichTable\n\nif TYPE_CHECKING:\n    from rich.console import Console, ConsoleOptions\n\n\nclass Table(RichTable):\n    def add_column(self, *args: Any, collapse: bool = False, **kwargs: Any) -> None:\n        super().add_column(*args, **kwargs)\n        self.columns[-1].collapse = collapse  # type: ignore[attr-defined]\n\n    def _calculate_column_widths(\n        self, console: \"Console\", options: \"ConsoleOptions\"\n    ) -> list[int]:\n        \"\"\"Calculate the widths of each column, including padding, not\n        including borders.\n\n        Adjacent collapsed columns will be removed until there is only a single\n        truncated column remaining.\n        \"\"\"\n        widths = super()._calculate_column_widths(console, options)\n        last_collapsed = -1\n        columns = self.columns\n        for i in range(len(columns) - 1, -1, -1):\n            if widths[i] == 0 and columns[i].collapse:  # type: ignore[attr-defined]\n                if last_collapsed >= 0:\n                    del widths[last_collapsed]\n                    del columns[last_collapsed]\n                    if self.box:\n                        options.max_width += 1\n                    for column in columns[last_collapsed:]:\n                        column._index -= 1\n                last_collapsed = i\n                padding = self._get_padding_width(i)\n                if (\n                    columns[i].overflow == \"ellipsis\"\n                    and (sum(widths) + padding) <= options.max_width\n                ):\n                    # Set content width to 1 (plus padding) if we can fit a\n                    # single unicode ellipsis in this column\n                    widths[i] = 1 + padding\n            else:\n                last_collapsed = -1\n        return widths\n\n    def _collapse_widths(  # type: ignore[override]\n        self,\n        widths: list[int],\n        wrapable: list[bool],\n        max_width: int,\n    ) -> list[int]:\n        \"\"\"Collapse columns right-to-left if possible to fit table into\n        max_width.\n\n        If table is still too wide after collapsing, rich's automatic overflow\n        handling will be used.\n        \"\"\"\n        collapsible = [column.collapse for column in self.columns]  # type: ignore[attr-defined]\n        total_width = sum(widths)\n        excess_width = total_width - max_width\n        if any(collapsible):\n            for i in range(len(widths) - 1, -1, -1):\n                if collapsible[i]:\n                    excess_width -= widths[i]\n                    widths[i] = 0\n                    if excess_width <= 0:\n                        break\n        return super()._collapse_widths(widths, wrapable, max_width)\n"
  },
  {
    "path": "dvc/utils/threadpool.py",
    "content": "from collections.abc import Iterable, Iterator\nfrom concurrent import futures\nfrom itertools import islice\nfrom typing import Any, Callable, Optional, TypeVar\n\n_T = TypeVar(\"_T\")\n\n\nclass ThreadPoolExecutor(futures.ThreadPoolExecutor):\n    def __init__(\n        self,\n        max_workers: Optional[int] = None,\n        cancel_on_error: bool = False,\n        **kwargs,\n    ):\n        super().__init__(max_workers=max_workers, **kwargs)\n        self._cancel_on_error = cancel_on_error\n\n    def imap_unordered(\n        self, fn: Callable[..., _T], *iterables: Iterable[Any]\n    ) -> Iterator[_T]:\n        \"\"\"Lazier version of map that does not preserve ordering of results.\n\n        It does not create all the futures at once to reduce memory usage.\n        \"\"\"\n\n        def create_taskset(n: int) -> set[futures.Future]:\n            return {self.submit(fn, *args) for args in islice(it, n)}\n\n        it = zip(*iterables)\n        tasks = create_taskset(self._max_workers * 5)\n        while tasks:\n            done, tasks = futures.wait(tasks, return_when=futures.FIRST_COMPLETED)\n            for fut in done:\n                yield fut.result()\n            tasks.update(create_taskset(len(done)))\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        cancel_futures = self._cancel_on_error and exc_val is not None\n        self.shutdown(wait=True, cancel_futures=cancel_futures)\n        return False\n"
  },
  {
    "path": "dvc/version.py",
    "content": "try:\n    from ._version import version as __version__  # type: ignore[import]\n    from ._version import version_tuple  # type: ignore[import]\nexcept ImportError:\n    __version__ = \"UNKNOWN\"\n    version_tuple = (0, 0, __version__)  # type: ignore[assignment]\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nbuild-backend = \"setuptools.build_meta\"\nrequires = [\"setuptools>=77\", \"setuptools_scm[toml]>=8\"]\n\n[project]\nname = \"dvc\"\ndescription = \"Git for data scientists - manage your code and data together\"\nreadme = \"README.rst\"\nkeywords = [\n    \"ai\",\n    \"collaboration\",\n    \"data-science\",\n    \"data-version-control\",\n    \"developer-tools\",\n    \"git\",\n    \"machine-learning\",\n    \"reproducibility\",\n]\nlicense = \"Apache-2.0\"\nlicense-files = [\"LICENSE\"]\nmaintainers = [{ name = \"Treeverse\", email = \"support@dvc.org\" }]\nauthors = [{ name = \"Dmitry Petrov\", email = \"dmitry@dvc.org\" }]\nrequires-python = \">=3.9\"\nclassifiers = [\n    \"Development Status :: 4 - Beta\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.9\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n]\ndynamic = [\"version\"]\n\ndependencies = [\n    \"attrs>=22.2.0\",\n    \"celery\",\n    \"colorama>=0.3.9\",\n    \"configobj>=5.0.9\",\n    \"distro>=1.3\",\n    \"dpath<3,>=2.1.0\",\n    \"dulwich\",\n    \"dvc-data>=3.18.2,<3.19.0\",\n    \"dvc-http>=2.29.0\",\n    \"dvc-objects\",\n    \"dvc-render>=1.0.1,<2\",\n    \"dvc-studio-client>=0.21,<1\",\n    \"dvc-task>=0.3.0,<1\",\n    \"flatten-dict<1,>=0.4.1\",\n    \"flufl.lock>=8.1.0,<10\",\n    \"fsspec>=2024.2.0\",\n    \"funcy>=1.14\",\n    \"grandalf<1,>=0.7\",\n    \"gto>=1.6.0,<2\",\n    \"hydra-core>=1.1\",\n    \"iterative-telemetry>=0.0.7\",\n    \"kombu\",\n    \"networkx>=2.5\",\n    \"omegaconf\",\n    \"packaging>=19\",\n    \"pathspec>=0.10.3,<2\",\n    \"platformdirs<5,>=3.1.1\",\n    \"psutil>=5.8\",\n    \"pydot>=1.2.4\",\n    \"pygtrie>=2.3.2\",\n    \"pyparsing>=3.0.0\",\n    \"requests>=2.22\",\n    \"rich>=12\",\n    \"ruamel.yaml>=0.17.11\",\n    \"scmrepo>=3.5.2,<4\",\n    \"shortuuid>=0.5\",\n    \"shtab<2,>=1.3.4\",\n    \"tabulate>=0.8.7\",\n    \"tomlkit>=0.11.1\",\n    \"tqdm<5,>=4.63.1\",\n    \"voluptuous>=0.11.7\",\n    \"zc.lockfile>=1.2.1\",\n]\n\n[project.optional-dependencies]\nall = [\"dvc[azure,gdrive,gs,hdfs,oss,s3,ssh,webdav,webhdfs]\"]\nazure = [\"dvc-azure>=3.1.0,<4\"]\ndev = [\"dvc[azure,gdrive,gs,hdfs,lint,oss,s3,ssh,tests,webdav,webhdfs]\"]\ngdrive = [\"dvc-gdrive>=3,<4\"]\ngs = [\"dvc-gs>=3.0.2,<4\"]\nhdfs = [\"dvc-hdfs>=3,<4\"]\nlint = [\n    \"mypy==1.19.1\",\n    \"pandas-stubs\",\n    \"types-colorama\",\n    \"types-psutil\",\n    \"types-pyinstaller\",\n    \"types-requests\",\n    \"types-tabulate\",\n    \"types-toml\",\n    \"types-tqdm\",\n    \"typing-extensions\",\n]\noss = [\"dvc-oss>=3,<4\"]\ns3 = [\"dvc-s3>=3.2.1,<4\"]\nssh = [\"dvc-ssh>=4,<5\"]\nssh_gssapi = [\"dvc-ssh[gssapi]>=4,<5\"]\ntesting = [\n  \"pytest-benchmark[histogram]>=5,<6\",\n  \"uv\",\n]\ntests = [\n    \"dvc[testing]\",\n    \"beautifulsoup4>=4.4\",\n    \"dvc-ssh\",\n    \"filelock\",\n    \"pytest>=7,<10\",\n    \"pytest-rerunfailures<17.0\",\n    \"pytest-cov>=4.1.0\",\n    \"pytest-docker>=1,<4\",\n    \"pytest-mock\",\n    \"pytest-timeout>=2\",\n    \"pytest-xdist>=3.2\",\n    'pywin32>=225; sys_platform == \"win32\"', # optional test dependency\n    'tzdata; sys_platform == \"win32\"',  # for testing with celery\n    \"sqlalchemy>=1,<3\", # optional dependency for `import-db`\n    \"pandas>=1\", # optional dependency for `import-db`\n]\nwebdav = [\"dvc-webdav>=3.0.1,<4\"]\nwebhdfs = [\"dvc-webhdfs>=3.1,<4\"]\nwebhdfs_kerberos = [\"dvc-webhdfs[kerberos]>=3.1,<4\"]\n\n[project.urls]\nDocumentation = \"https://dvc.org/doc\"\nIssues = \"https://github.com/treeverse/dvc/issues\"\nSource = \"https://github.com/treeverse/dvc\"\n\n[project.scripts]\ndvc = \"dvc.cli:main\"\n\n[project.entry-points.\"fsspec.specs\"]\ndvc = \"dvc.api:DVCFileSystem\"\n\n[project.entry-points.\"universal_pathlib.implementations\"]\ndvc = \"dvc.fs.dvc_path:DVCPath\"\n# universal_pathlib does not support fsspec url chaining yet.\n# see https://github.com/fsspec/universal_pathlib/issues/28.\n\"dvc+http\" = \"dvc.fs.dvc_path:DVCPath\"\n\"dvc+https\" = \"dvc.fs.dvc_path:DVCPath\"\n\"dvc+ssh\" = \"dvc.fs.dvc_path:DVCPath\"\n\n[project.entry-points.\"pyinstaller40\"]\nhook-dirs = \"dvc.__pyinstaller:get_hook_dirs\"\ntests = \"dvc.__pyinstaller:get_PyInstaller_tests\"\n\n[tool.setuptools.packages.find]\nexclude = [\"tests\", \"tests.*\"]\nnamespaces = false\n\n[tool.setuptools_scm]\nwrite_to = \"dvc/_version.py\"\n\n[tool.pytest.ini_options]\naddopts = \"-ra --cov-config pyproject.toml\"\nfilterwarnings = [\n    \"error::ResourceWarning\",\n    \"error::pytest.PytestUnraisableExceptionWarning\",\n    \"error::pytest_mock.PytestMockWarning\",\n    # https://github.com/boto/botocore/issues/2744\n    \"ignore:'urllib3.contrib.pyopenssl' module is deprecated:DeprecationWarning\",\n    # google.cloud: https://github.com/googleapis/python-storage/issues/1000\n    # google.logging: https://github.com/googleapis/python-logging/issues/730\n    # Also happens with `zc.lockfile`.\n    \"ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning\",\n    # see https://github.com/networkx/networkx/issues/5723.\n    \"ignore:nx.nx_pydot.* depends on the pydot package, which has.*known issues and is not actively maintained:DeprecationWarning\",\n    # TODO: investigate where we are not closing sqlite3.Connection\n    \"ignore:unclosed database.*sqlite3.Connection:ResourceWarning\",\n    \"ignore:unclosed.*<socket.socket:ResourceWarning\",\n]\nmarkers = [\n    \"needs_internet: Might need network access for the tests\",\n    \"studio: Tests verifying contract between DVC and Studio\",\n    \"vscode: Tests verifying contract between DVC and VSCode plugin\",\n]\ntestpaths = [\"tests\"]\nxfail_strict = true\n\n[tool.coverage.run]\nbranch = true\nsource = [\"dvc\", \"tests\"]\n\n[tool.coverage.paths]\nsource = [\"dvc\"]\n\n[tool.coverage.report]\nexclude_lines = [\n    \"if __name__ == .__main__.:\",\n    \"if TYPE_CHECKING:\",\n    \"if typing.TYPE_CHECKING:\",\n    \"@overload\",\n    \"pragma: no cover\",\n    \"raise AssertionError\",\n    \"raise NotImplementedError\",\n]\nshow_missing = true\n\n[tool.mypy]\ncheck_untyped_defs = true\nfiles = [\"dvc\"]\nno_implicit_optional = true\npretty = true\nshow_column_numbers = true\nshow_error_codes = true\nshow_error_context = true\nshow_traceback = true\nstrict_equality = true\nextra_checks = true\nwarn_no_return = true\nwarn_redundant_casts = true\nwarn_unreachable = true\nwarn_unused_configs = true\n\n[[tool.mypy.overrides]]\nignore_missing_imports = true\nmodule = [\n    \"agate.*\",\n    \"celery.*\",\n    \"configobj.*\",\n    \"dpath.*\",\n    \"distro\",\n    \"dvc_http\",\n    \"dvc_render.*\",\n    \"dvc_ssh\",\n    \"dvc_studio_client.*\",\n    \"flatten_dict\",\n    \"fsspec.*\",\n    \"funcy.*\",  # https://github.com/Suor/funcy/issues/106,\n    \"grandalf.*\",\n    \"ipdb\",\n    \"iterative_telemetry\",\n    \"kombu.*\",\n    \"networkx.*\",  # https://github.com/networkx/networkx/issues/3988\n    \"pygtrie.*\",\n    \"pyinstrument\",\n    \"pyparsing\",\n    \"pytest_benchmark.*\",\n    \"pytest_docker.plugin\",\n    \"ruamel.*\",\n    \"ruamel.yaml.*\",\n    \"shortuuid\",\n    \"shtab\",\n    \"upath\",\n    \"viztracer\",\n    \"voluptuous\",\n    \"yappi\",\n    \"zc.*\",\n]\n\n[tool.codespell]\nignore-words-list = \"ba,datas,fo,uptodate,cachable,falsy\"\nskip = \"CODE_OF_CONDUCT.md\"\n\n[tool.ruff]\noutput-format = \"full\"\nshow-fixes = true\n\n[tool.ruff.lint]\nignore = [\n    \"PERF203\", # try-except-in-loop\n    \"PLC0415\",  # import-outside-top-level\n    \"PLR2004\", # magic-value-comparison\n    \"PLW2901\", # redefined-loop-name\n    \"S101\", # assert\n    \"SIM105\", # suppressible-exception\n    \"SIM108\",  # if-else-block-instead-of-if-exp\n    \"SIM117\", # multiple-with-statements\n    \"TRY003\", # raise-vanilla-args\n    \"TRY300\", # try-consider-else\n]\nselect = [\n    \"F\", \"E\", \"W\", \"C90\", \"I\", \"N\", \"UP\", \"YTT\", \"ASYNC\", \"S\", \"BLE\", \"B\", \"A\", \"C4\", \"DTZ\", \"T10\",\n    \"EXE\", \"ISC\", \"ICN\", \"LOG\", \"G\", \"INP\", \"PIE\", \"T20\", \"PYI\", \"PT\", \"Q\", \"RSE\", \"RET\",\n    \"SLOT\", \"SIM\", \"TID\", \"TC\", \"ARG\", \"PGH\", \"PLC\", \"PLE\", \"PLR\", \"PLW\", \"TRY\",\n    \"FLY\", \"PERF\", \"FURB\", \"RUF\", \"RUF027\", \"RUF029\", \"RUF036\",\n]\npreview = true\nexplicit-preview-rules = true\n\n[tool.ruff.lint.flake8-pytest-style]\nparametrize-names-type = \"csv\"\nraises-extend-require-match-for = [\"dvc.exceptions.DvcException\", \"dvc.scm.SCMError\", \"scmrepo.exceptions.SCMError\"]\n\n[tool.ruff.lint.flake8-tidy-imports]\n[tool.ruff.lint.flake8-tidy-imports.banned-api]\n\"funcy.cached_property\" = {msg = \"use `from dvc.utils.objects import cached_property` instead.\"}\n\n[tool.ruff.lint.flake8-type-checking]\nstrict = true\n\n[tool.ruff.lint.flake8-unused-arguments]\nignore-variadic-names = true\n\n[tool.ruff.lint.isort]\nknown-first-party = [\"dvc\", \"dvc_*\", \"tests\"]\n\n[tool.ruff.lint.pep8-naming]\nextend-ignore-names = [\"M\", \"SCM\"]\n\n[tool.ruff.lint.pylint]\nmax-args = 10\n\n[tool.ruff.lint.per-file-ignores]\n\"dvc/commands/**\" = [\"N806\"]\n\"dvc/testing/**\" = [\"ARG002\"]\n\"dvc/testing/benchmarks/**\" = [\"ARG001\"]\n\"tests/**\" = [\"S\", \"ARG001\", \"ARG002\", \"TRY002\", \"TRY301\", \"PERF\", \"PLR2004\"]\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/conftest.py",
    "content": "import json\nimport os\nimport sys\nfrom contextlib import ExitStack, suppress\n\nimport pytest\n\nfrom dvc import env\nfrom dvc.stage import PipelineStage\nfrom dvc.testing.fixtures import *  # noqa: F403\n\nfrom .dir_helpers import *  # noqa: F403\nfrom .remotes import *  # noqa: F403\nfrom .scripts import *  # noqa: F403\n\n# Prevent updater and analytics from running their processes\nos.environ[\"DVC_TEST\"] = \"true\"\n# Ensure progress output even when not outputting to raw sys.stderr console\nos.environ[\"DVC_IGNORE_ISATTY\"] = \"true\"\n# Disable system git config\nos.environ[\"GIT_CONFIG_NOSYSTEM\"] = \"1\"\n\nREMOTES = {\n    # remote: enabled_by_default?\n    \"azure\": False,\n    \"gdrive\": False,\n    \"gs\": False,\n    \"hdfs\": True,\n    \"real_hdfs\": False,\n    \"http\": True,\n    \"oss\": False,\n    \"s3\": False,\n    \"ssh\": False,\n    \"webdav\": True,\n}\n\n\n@pytest.fixture(autouse=True)\ndef reset_loglevel(request, caplog):\n    \"\"\"\n    Use it to ensure log level at the start of each test\n    regardless of dvc.logger.setup(), Repo configs or whatever.\n    \"\"\"\n    ini_opt = None\n    with suppress(ValueError):\n        ini_opt = request.config.getini(\"log_level\")\n\n    level = request.config.getoption(\"--log-level\") or ini_opt\n    with ExitStack() as stack:\n        if level:\n            for name in [\"dvc\", \"dvc_data\", \"dvc_objects\"]:\n                stack.enter_context(caplog.at_level(level.upper(), logger=name))\n        yield\n\n\n@pytest.fixture(autouse=True)\ndef enable_ui():\n    from dvc.ui import ui\n\n    ui.enable()\n\n\n@pytest.fixture(autouse=True)\ndef clean_repos():\n    from dvc.repo.open_repo import clean_repos\n\n    clean_repos()\n\n\ndef _get_opt(remote_name, action):\n    return f\"--{action}-{remote_name}\"\n\n\ndef pytest_addoption(parser):\n    \"\"\"Adds remote-related flags to selectively disable/enable for tests\n    Eg: If some remotes, eg: ssh is enabled to be tested for by default\n    (see above `REMOTES`), then, `--disable-ssh` flag is added. If remotes\n    like `hdfs` are disabled by default, `--enable-hdfs` is added to make them\n    run.\n\n    You can also make everything run-by-default with `--all` flag, which takes\n    precedence on all previous `--enable-*`/`--disable-*` flags.\n    \"\"\"\n    parser.addoption(\n        \"--all\",\n        action=\"store_true\",\n        default=False,\n        help=\"Test all of the remotes, unless other flags also supplied\",\n    )\n    for remote_name in REMOTES:\n        for action in (\"enable\", \"disable\"):\n            opt = _get_opt(remote_name, action)\n            parser.addoption(\n                opt,\n                action=\"store_true\",\n                default=None,\n                help=f\"{action} tests for {remote_name}\",\n            )\n\n\nclass DVCTestConfig:\n    def __init__(self):\n        self.enabled_remotes = set()\n\n    def requires(self, remote_name):\n        if remote_name not in REMOTES or remote_name in self.enabled_remotes:\n            return\n\n        pytest.skip(f\"{remote_name} tests not enabled through CLI\")\n\n    def apply_marker(self, marker):\n        self.requires(marker.name)\n\n\ndef pytest_runtest_setup(item):\n    # Apply test markers to skip tests selectively\n    # NOTE: this only works on individual tests,\n    # for fixture, use `test_config` fixture and\n    # run `test_config.requires(remote_name)`.\n    for marker in item.iter_markers():\n        item.config.dvc_config.apply_marker(marker)\n\n    if \"CI\" in os.environ and item.get_closest_marker(\"needs_internet\") is not None:\n        # remotes that need internet connection might be flaky,\n        # so we rerun them in case it fails.\n        item.add_marker(pytest.mark.flaky(reruns=5))\n\n\n@pytest.fixture(scope=\"session\")\ndef test_config(request):\n    return request.config.dvc_config\n\n\ndef pytest_configure(config):\n    config.dvc_config = DVCTestConfig()\n\n    for remote_name in REMOTES:\n        config.addinivalue_line(\n            \"markers\", f\"{remote_name}: mark test as requiring {remote_name}\"\n        )\n\n    enabled_remotes = config.dvc_config.enabled_remotes\n    if config.getoption(\"--all\"):\n        enabled_remotes.update(REMOTES)\n    else:\n        default_enabled = {k for k, v in REMOTES.items() if v}\n        enabled_remotes.update(default_enabled)\n\n    for remote_name in REMOTES:\n        enabled_opt = _get_opt(remote_name, \"enable\")\n        disabled_opt = _get_opt(remote_name, \"disable\")\n\n        enabled = config.getoption(enabled_opt)\n        disabled = config.getoption(disabled_opt)\n        if disabled and enabled:\n            continue  # default behavior if both flags are supplied\n\n        if disabled:\n            enabled_remotes.discard(remote_name)\n        if enabled:\n            enabled_remotes.add(remote_name)\n\n\n@pytest.fixture\ndef custom_template(tmp_dir, dvc):\n    from dvc_render.vega_templates import SimpleLinearTemplate\n\n    template = tmp_dir / \"custom_template.json\"\n    template.write_text(json.dumps(SimpleLinearTemplate.DEFAULT_CONTENT))\n    return template\n\n\n@pytest.fixture(autouse=True)\ndef mocked_webbrowser_open(mocker):\n    mocker.patch(\"webbrowser.open\")\n\n\n@pytest.fixture(scope=\"session\", autouse=True)\ndef isolate(tmp_path_factory):\n    path = tmp_path_factory.mktemp(\"mock\")\n    home_dir = path / \"home\"\n    home_dir.mkdir()\n\n    monkeypatch = pytest.MonkeyPatch()\n    if sys.platform == \"win32\":\n        home_drive, home_path = os.path.splitdrive(home_dir)\n        monkeypatch.setenv(\"USERPROFILE\", str(home_dir))\n        monkeypatch.setenv(\"HOMEDRIVE\", home_drive)\n        monkeypatch.setenv(\"HOMEPATH\", home_path)\n\n        for env_var, sub_path in ((\"APPDATA\", \"Roaming\"), (\"LOCALAPPDATA\", \"Local\")):\n            path = home_dir / \"AppData\" / sub_path\n            path.mkdir(parents=True)\n            monkeypatch.setenv(env_var, os.fspath(path))\n    else:\n        monkeypatch.setenv(\"HOME\", str(home_dir))\n        monkeypatch.setenv(\"XDG_CONFIG_HOME\", str(home_dir / \".config\"))\n\n    monkeypatch.setenv(\"GIT_CONFIG_NOSYSTEM\", \"1\")\n    contents = b\"\"\"\n[user]\nname=DVC Tester\nemail=dvctester@example.com\n[init]\ndefaultBranch=master\n\"\"\"\n    (home_dir / \".gitconfig\").write_bytes(contents)\n\n    import pygit2\n\n    pygit2.settings.search_path[pygit2.GIT_CONFIG_LEVEL_GLOBAL] = str(home_dir)\n\n    monkeypatch.setenv(env.DVC_SYSTEM_CONFIG_DIR, os.fspath(path / \"system\"))\n    monkeypatch.setenv(env.DVC_GLOBAL_CONFIG_DIR, os.fspath(path / \"global\"))\n    monkeypatch.setenv(env.DVC_SITE_CACHE_DIR, os.fspath(path / \"site_cache_dir\"))\n\n    yield\n\n    monkeypatch.undo()\n\n\n@pytest.fixture\ndef run_copy_metrics(tmp_dir, copy_script):\n    def run(\n        file1,\n        file2,\n        commit=None,\n        tag=None,\n        single_stage=True,\n        name=None,\n        **kwargs,\n    ):\n        if name:\n            single_stage = False\n\n        stage = tmp_dir.dvc.run(\n            cmd=f\"python copy.py {file1} {file2}\",\n            deps=[file1],\n            single_stage=single_stage,\n            name=name,\n            **kwargs,\n        )\n\n        if hasattr(tmp_dir.dvc, \"scm\"):\n            files = [stage.path]\n            if isinstance(stage, PipelineStage):\n                files += [stage.dvcfile._lockfile.path]\n            files += [out.fs_path for out in stage.outs if not out.use_cache]\n            tmp_dir.dvc.scm.add(files)\n            if commit:\n                tmp_dir.dvc.scm.commit(commit)\n            if tag:\n                tmp_dir.dvc.scm.tag(tag)\n        return stage\n\n    return run\n"
  },
  {
    "path": "tests/dir_helpers.py",
    "content": "\"\"\"\nThe goal of this module is making dvc functional tests setup a breeze. This\nincludes a temporary dir, initializing git and DVC repos and bootstrapping some\nfile structure.\n\nThe cornerstone of these fixtures is `tmp_dir`, which creates a temporary dir\nand changes path to it, it might be combined with `scm` and `dvc` to initialize\nempty git and DVC repos. `tmp_dir` returns a Path instance, which should save\nyou from using `open()`, `os` and `os.path` utils many times:\n\n    (tmp_dir / \"some_file\").write_text(\"some text\")\n    # ...\n    assert \"some text\" == (tmp_dir / \"some_file\").read_text()\n    assert (tmp_dir / \"some_file\").exists()\n\nAdditionally it provides `.gen()`, `.scm_gen()` and `.dvc_gen()` methods to\nbootstrap a required file structure in a single call:\n\n    # Generate a dir with files\n    tmp_dir.gen({\"dir\": {\"file\": \"file text\", \"second_file\": \"...\"}})\n\n    # Generate a single file, dirs will be created along the way\n    tmp_dir.gen(\"dir/file\", \"file text\")\n\n    # Generate + git add\n    tmp_dir.scm_gen({\"file1\": \"...\", ...})\n\n    # Generate + git add + git commit\n    tmp_dir.scm_gen({\"file1\": \"...\", ...}, commit=\"add files\")\n\n    # Generate + dvc add\n    tmp_dir.dvc_gen({\"file1\": \"...\", ...})\n\n    # Generate + dvc add + git commit -am \"...\"\n    # This commits stages to git not the generated files.\n    tmp_dir.dvc_gen({\"file1\": \"...\", ...}, commit=\"add files\")\n\nMaking it easier to bootstrap things has a supergoal of incentivizing a move\nfrom global repo template to creating everything inplace, which:\n\n    - makes all path references local to test, enhancing readability\n    - allows using telling filenames, e.g. \"git_tracked_file\" instead of \"foo\"\n    - does not create unnecessary files\n\"\"\"\n\nimport os\n\nimport pytest\nfrom dulwich.porcelain import remote_add as git_remote_add\n\n__all__ = [\n    \"erepo_dir\",\n    \"git_dir\",\n    \"git_downstream\",\n    \"git_upstream\",\n    \"run_head\",\n]\n\n\n@pytest.fixture\ndef run_head(tmp_dir, head_script, dvc):\n    script = os.path.abspath(tmp_dir / \"head.py\")\n\n    def run(*args, **run_kwargs):\n        return dvc.run(\n            **{\n                \"cmd\": \"python {} {}\".format(script, \" \".join(args)),\n                \"outs\": [dep + \"-1\" for dep in args],\n                \"deps\": list(args),\n                **run_kwargs,\n            }\n        )\n\n    return run\n\n\n@pytest.fixture\ndef erepo_dir(make_tmp_dir):\n    return make_tmp_dir(\"erepo\", scm=True, dvc=True)\n\n\n@pytest.fixture\ndef git_dir(make_tmp_dir):\n    path = make_tmp_dir(\"git-erepo\", scm=True)\n    path.scm.commit(\"init repo\")\n    return path\n\n\nclass GitRemote:\n    def __init__(self, tmp_dir, name, url):\n        self.tmp_dir = tmp_dir\n        self.remote = name\n        self.url = url\n\n\n@pytest.fixture\ndef git_upstream(tmp_dir, erepo_dir, git_dir, request):\n    remote = erepo_dir if \"dvc\" in request.fixturenames else git_dir\n    url = f\"file://{remote.resolve().as_posix()}\"\n    git_remote_add(tmp_dir, \"upstream\", url)\n    return GitRemote(remote, \"upstream\", url)\n\n\n@pytest.fixture\ndef git_downstream(tmp_dir, erepo_dir, git_dir, request):\n    remote = erepo_dir if \"dvc\" in request.fixturenames else git_dir\n    url = f\"file://{tmp_dir.resolve().as_posix()}\"\n    git_remote_add(remote, \"upstream\", url)\n    return GitRemote(remote, \"upstream\", url)\n"
  },
  {
    "path": "tests/docker-compose.yml",
    "content": "---\nversion: '3.2'\nservices:\n  git-server:\n    image: ghcr.io/linuxserver/openssh-server\n    environment:\n      - USER_NAME=user\n      - PUBLIC_KEY_FILE=/tmp/key\n    ports:\n      - 2222\n    volumes:\n      - ./remotes/user.key.pub:/tmp/key\n      - ./remotes/git-init:/custom-cont-init.d\n"
  },
  {
    "path": "tests/func/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/api/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/api/test_artifacts.py",
    "content": "from os.path import join, normpath\n\nimport pytest\n\nfrom dvc.api import artifacts_show\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc.utils import as_posix\nfrom tests.func.artifacts.test_artifacts import get_tag_and_name, make_artifact\n\n\n@pytest.mark.parametrize(\"sub\", [\"sub\", \"\"])\ndef test_artifacts_show(tmp_dir, dvc, scm, sub):\n    subdir = tmp_dir / sub\n\n    dirname = str(subdir.relative_to(tmp_dir))\n    tag, name = get_tag_and_name(as_posix(dirname), \"myart\", \"v2.0.0\")\n    make_artifact(tmp_dir, \"myart\", tag, subdir / \"myart.pkl\")\n\n    assert artifacts_show(name) == {\n        \"path\": normpath(join(dirname, \"myart.pkl\")),\n        \"rev\": scm.get_rev(),\n    }\n    assert artifacts_show(name, repo=tmp_dir.fs_path) == {\n        \"path\": normpath(join(dirname, \"myart.pkl\")),\n        \"rev\": scm.get_rev(),\n    }\n    assert artifacts_show(name, repo=f\"file://{tmp_dir.as_posix()}\") == {\n        \"path\": normpath(join(dirname, \"myart.pkl\")),\n        \"rev\": scm.get_rev(),\n    }\n\n    assert artifacts_show(name, repo=subdir.fs_path) == {\n        \"path\": normpath(join(dirname, \"myart.pkl\")),\n        \"rev\": scm.get_rev(),\n    }\n    with subdir.chdir():\n        assert artifacts_show(name) == {\n            \"path\": normpath(join(dirname, \"myart.pkl\")),\n            \"rev\": scm.get_rev(),\n        }\n\n\n@pytest.mark.parametrize(\"sub\", [\"sub\", \"\"])\ndef test_artifacts_show_subrepo(tmp_dir, scm, sub):\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm)\n    subdir = subrepo / sub\n\n    dirname = str(subdir.relative_to(tmp_dir))\n    tag, name = get_tag_and_name(as_posix(dirname), \"myart\", \"v2.0.0\")\n    make_artifact(subrepo, \"myart\", tag, subdir / \"myart.pkl\")\n\n    assert artifacts_show(name) == {\n        \"path\": join(dirname, \"myart.pkl\"),\n        \"rev\": scm.get_rev(),\n    }\n    assert artifacts_show(name, repo=tmp_dir.fs_path) == {\n        \"path\": join(dirname, \"myart.pkl\"),\n        \"rev\": scm.get_rev(),\n    }\n    assert artifacts_show(name, repo=f\"file://{tmp_dir.as_posix()}\") == {\n        \"path\": join(dirname, \"myart.pkl\"),\n        \"rev\": scm.get_rev(),\n    }\n\n    assert artifacts_show(name, repo=subdir.fs_path) == {\n        \"path\": str((subdir / \"myart.pkl\").relative_to(subrepo)),\n        \"rev\": scm.get_rev(),\n    }\n    with subdir.chdir():\n        assert artifacts_show(name) == {\n            \"path\": str((subdir / \"myart.pkl\").relative_to(subrepo)),\n            \"rev\": scm.get_rev(),\n        }\n"
  },
  {
    "path": "tests/func/api/test_data.py",
    "content": "import os\n\nimport pytest\nfrom funcy import first, get_in\n\nfrom dvc import api\nfrom dvc.exceptions import OutputNotFoundError, PathMissingError\nfrom dvc.scm import CloneError\nfrom dvc.testing.api_tests import TestAPI  # noqa: F401\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc.utils.fs import remove\n\n\ndef test_get_url_external(tmp_dir, erepo_dir, cloud):\n    erepo_dir.add_remote(config=cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    # Using file url to force clone to tmp repo\n    repo_url = f\"file://{erepo_dir.as_posix()}\"\n    expected_url = (cloud / \"files\" / \"md5\" / \"ac/bd18db4cc2f85cedef654fccc4a4d8\").url\n    assert api.get_url(\"foo\", repo=repo_url) == expected_url\n\n\ndef test_get_url_requires_dvc(tmp_dir, scm):\n    tmp_dir.scm_gen({\"foo\": \"foo\"}, commit=\"initial\")\n\n    with pytest.raises(OutputNotFoundError, match=\"output 'foo'\"):\n        api.get_url(\"foo\", repo=os.fspath(tmp_dir))\n\n    with pytest.raises(OutputNotFoundError, match=\"output 'foo'\"):\n        api.get_url(\"foo\", repo=f\"file://{tmp_dir.as_posix()}\")\n\n\ndef test_get_url_from_remote(tmp_dir, erepo_dir, cloud, local_cloud):\n    erepo_dir.add_remote(config=cloud.config, name=\"other\")\n    erepo_dir.add_remote(config=local_cloud.config, default=True)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    # Using file url to force clone to tmp repo\n    repo_url = f\"file://{erepo_dir.as_posix()}\"\n    expected_rel_path = os.path.join(\n        \"files\", \"md5\", \"ac/bd18db4cc2f85cedef654fccc4a4d8\"\n    )\n\n    # Test default remote\n    assert api.get_url(\"foo\", repo=repo_url) == (local_cloud / expected_rel_path).url\n\n    # Test remote arg\n    assert (\n        api.get_url(\"foo\", repo=repo_url, remote=\"other\")\n        == (cloud / expected_rel_path).url\n    )\n\n    # Test config arg\n    assert (\n        api.get_url(\"foo\", repo=repo_url, config={\"core\": {\"remote\": \"other\"}})\n        == (cloud / expected_rel_path).url\n    )\n\n    # Test remote_config arg\n    assert (\n        api.get_url(\"foo\", repo=repo_url, remote_config={\"url\": cloud.url})\n        == (cloud / expected_rel_path).url\n    )\n\n\ndef test_get_url_ignore_scm(tmp_dir, dvc, cloud, scm):\n    tmp_dir.add_remote(config=cloud.config)\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    repo_posix = tmp_dir.as_posix()\n    expected_url = (cloud / \"files\" / \"md5\" / \"ac/bd18db4cc2f85cedef654fccc4a4d8\").url\n\n    # Test baseline with scm\n    assert api.get_url(\"foo\", repo=repo_posix) == expected_url\n\n    # Simulate gitless environment (e.g. deployed container)\n    (tmp_dir / \".git\").rename(tmp_dir / \"gitless_environment\")\n\n    assert api.get_url(\"foo\", repo=repo_posix) == expected_url\n    assert (\n        api.get_url(\"foo\", repo=repo_posix, config={\"core\": {\"no_scm\": True}})\n        == expected_url\n    )\n\n    # Addressing repos with `file://` triggers git, so it fails in a gitless environment\n    repo_url = f\"file://{repo_posix}\"\n    with pytest.raises(\n        CloneError,\n        match=\"SCM error\",\n    ):\n        api.get_url(\"foo\", repo=repo_url, config={\"core\": {\"no_scm\": True}})\n\n\ndef test_open_external(tmp_dir, erepo_dir, cloud):\n    erepo_dir.add_remote(config=cloud.config)\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"version\", \"master\", commit=\"add version\")\n\n        with erepo_dir.branch(\"branch\", new=\"True\"):\n            # NOTE: need file to be other size for Mac\n            erepo_dir.dvc_gen(\"version\", \"branchver\", commit=\"add version\")\n\n    assert erepo_dir.dvc.push(all_branches=True) == 2\n\n    # Remove cache to force download\n    remove(erepo_dir.dvc.cache.local.path)\n\n    # Using file url to force clone to tmp repo\n    repo_url = f\"file://{erepo_dir.as_posix()}\"\n    with api.open(\"version\", repo=repo_url) as fd:\n        assert fd.read() == \"master\"\n\n    assert api.read(\"version\", repo=repo_url, rev=\"branch\") == \"branchver\"\n\n\ndef test_open_granular(tmp_dir, dvc, remote):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo-text\"}})\n    dvc.push()\n\n    # Remove cache to force download\n    remove(dvc.cache.local.path)\n\n    with api.open(\"dir/foo\") as fd:\n        assert fd.read() == \"foo-text\"\n\n\ndef test_missing(tmp_dir, dvc, remote):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    # Remove cache to make foo missing\n    remove(dvc.cache.local.path)\n\n    api.read(\"foo\")\n\n    remove(\"foo\")\n\n    with pytest.raises(PathMissingError):\n        api.read(\"foo\")\n\n\ndef test_open_scm_controlled(tmp_dir, erepo_dir):\n    erepo_dir.scm_gen({\"scm_controlled\": \"file content\"}, commit=\"create file\")\n\n    with api.open(\"scm_controlled\", repo=os.fspath(erepo_dir)) as fd:\n        assert fd.read() == \"file content\"\n\n\ndef test_open_not_cached(dvc):\n    metric_file = \"metric.txt\"\n    metric_content = \"0.6\"\n    metric_code = f\"open('{metric_file}', 'w').write('{metric_content}')\"\n    dvc.run(\n        name=\"write-metric\",\n        metrics_no_cache=[metric_file],\n        cmd=f'python -c \"{metric_code}\"',\n    )\n\n    with api.open(metric_file) as fd:\n        assert fd.read() == metric_content\n\n    os.remove(metric_file)\n    with pytest.raises(PathMissingError):\n        api.read(metric_file)\n\n\ndef test_open_rev(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"foo\")\n\n    (tmp_dir / \"foo\").write_text(\"bar\")\n\n    with api.open(\"foo\", rev=\"master\") as fobj:\n        assert fobj.read() == \"foo\"\n\n\n@pytest.mark.parametrize(\"as_external\", [True, False])\n@pytest.mark.parametrize(\n    \"files, to_read\",\n    [\n        ({\"foo\": \"foo\"}, \"foo\"),\n        ({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, os.path.join(\"dir\", \"foo\")),\n    ],\n    ids=[\"file\", \"inside-dir\"],\n)\ndef test_api_missing_local_cache_exists_on_remote(\n    tmp_dir, scm, dvc, as_external, remote, files, to_read\n):\n    tmp_dir.dvc_gen(files, commit=\"DVC track files\")\n    dvc.push()\n\n    # Remove cache to make foo missing\n    remove(dvc.cache.local.path)\n    remove(first(files))\n\n    repo_url = f\"file://{tmp_dir.as_posix()}\" if as_external else None\n    file_content = get_in(files, to_read.split(os.sep))\n    assert api.read(to_read, repo=repo_url) == file_content\n\n\n@pytest.mark.parametrize(\"local_repo\", [False, True])\ndef test_read_with_subrepos(tmp_dir, scm, local_cloud, local_repo):\n    tmp_dir.scm_gen(\"foo.txt\", \"foo.txt\", commit=\"add foo.txt\")\n    subrepo = tmp_dir / \"dir\" / \"subrepo\"\n    make_subrepo(subrepo, scm, config=local_cloud.config)\n    with subrepo.chdir():\n        subrepo.scm_gen({\"lorem\": \"lorem\"}, commit=\"add lorem\")\n        subrepo.dvc_gen({\"dir\": {\"file.txt\": \"file.txt\"}}, commit=\"add dir\")\n        subrepo.dvc_gen(\"dvc-file\", \"dvc-file\", commit=\"add dir\")\n        subrepo.dvc.push()\n\n    repo_path = None if local_repo else f\"file://{tmp_dir.as_posix()}\"\n    subrepo_path = os.path.join(\"dir\", \"subrepo\")\n\n    assert api.read(\"foo.txt\", repo=repo_path) == \"foo.txt\"\n    assert api.read(os.path.join(subrepo_path, \"lorem\"), repo=repo_path) == \"lorem\"\n    assert (\n        api.read(os.path.join(subrepo_path, \"dvc-file\"), repo=repo_path) == \"dvc-file\"\n    )\n    assert (\n        api.read(os.path.join(subrepo_path, \"dir\", \"file.txt\"), repo=repo_path)\n        == \"file.txt\"\n    )\n\n\ndef test_get_url_granular(tmp_dir, dvc, cloud):\n    tmp_dir.add_remote(config=cloud.config)\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"nested\": {\"file\": \"file\"}}})\n\n    expected_url = (\n        cloud / \"files\" / \"md5\" / \"5f\" / \"c28ea78987408341668eba6525ebd1.dir\"\n    ).url\n    assert api.get_url(\"dir\") == expected_url\n\n    expected_url = (\n        cloud / \"files\" / \"md5\" / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\"\n    ).url\n    assert api.get_url(\"dir/foo\") == expected_url\n\n    expected_url = (\n        cloud / \"files\" / \"md5\" / \"37\" / \"b51d194a7513e45b56f6524f2d51f2\"\n    ).url\n    assert api.get_url(\"dir/bar\") == expected_url\n\n    expected_url = (\n        cloud / \"files\" / \"md5\" / \"8c\" / \"7dd922ad47494fc02c388e12c00eac\"\n    ).url\n    assert api.get_url(os.path.join(\"dir\", \"nested\", \"file\")) == expected_url\n\n\ndef test_get_url_subrepos(tmp_dir, scm, local_cloud):\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm, config=local_cloud.config)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"foo\": \"foo\"}, \"bar\": \"bar\"}, commit=\"add files\")\n        subrepo.dvc.push()\n\n    expected_url = os.fspath(\n        local_cloud / \"files\" / \"md5\" / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\"\n    )\n    assert api.get_url(os.path.join(\"subrepo\", \"dir\", \"foo\")) == expected_url\n    assert api.get_url(os.path.join(\"subrepo\", \"dir\", \"foo\"), repo=\".\") == expected_url\n\n    expected_url = os.fspath(\n        local_cloud / \"files\" / \"md5\" / \"37\" / \"b51d194a7513e45b56f6524f2d51f2\"\n    )\n    assert api.get_url(\"subrepo/bar\") == expected_url\n    assert api.get_url(\"subrepo/bar\", repo=\".\") == expected_url\n\n\ndef test_open_from_remote(tmp_dir, erepo_dir, cloud, local_cloud):\n    erepo_dir.add_remote(config=cloud.config, name=\"other\")\n    erepo_dir.add_remote(config=local_cloud.config, default=True)\n    erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}}, commit=\"create file\")\n    erepo_dir.dvc.push(remote=\"other\")\n    remove(erepo_dir.dvc.cache.local.path)\n\n    with api.open(\n        os.path.join(\"dir\", \"foo\"),\n        repo=f\"file://{erepo_dir.as_posix()}\",\n        remote=\"other\",\n    ) as fd:\n        assert fd.read() == \"foo content\"\n\n    with api.open(\n        os.path.join(\"dir\", \"foo\"),\n        repo=f\"file://{erepo_dir.as_posix()}\",\n        config={\"core\": {\"remote\": \"other\"}},\n    ) as fd:\n        assert fd.read() == \"foo content\"\n\n\ndef test_read_from_remote(tmp_dir, erepo_dir, cloud, local_cloud):\n    erepo_dir.add_remote(config=cloud.config, name=\"other\")\n    erepo_dir.add_remote(config=local_cloud.config, default=True)\n    erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}}, commit=\"create file\")\n    erepo_dir.dvc.push(remote=\"other\")\n    remove(erepo_dir.dvc.cache.local.path)\n\n    assert (\n        api.read(\n            os.path.join(\"dir\", \"foo\"),\n            repo=f\"file://{erepo_dir.as_posix()}\",\n            remote=\"other\",\n        )\n        == \"foo content\"\n    )\n\n    assert (\n        api.read(\n            os.path.join(\"dir\", \"foo\"),\n            repo=f\"file://{erepo_dir.as_posix()}\",\n            config={\"core\": {\"remote\": \"other\"}},\n        )\n        == \"foo content\"\n    )\n\n    assert (\n        api.read(\n            os.path.join(\"dir\", \"foo\"),\n            repo=f\"file://{erepo_dir.as_posix()}\",\n            remote_config={\"url\": cloud.url},\n        )\n        == \"foo content\"\n    )\n"
  },
  {
    "path": "tests/func/api/test_experiments.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc import api\nfrom dvc.repo.experiments.exceptions import ExperimentExistsError\nfrom tests.unit.repo.experiments.conftest import exp_stage  # noqa: F401\n\n\ndef test_exp_save(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen({\"foo\": \"foo\"}, commit=\"initial\")\n\n    api.exp_save()\n\n    api.exp_save(\"foo\")\n    with pytest.raises(\n        ExperimentExistsError,\n        match=re.escape(\"Experiment conflicts with existing experiment 'foo'.\"),\n    ):\n        api.exp_save(\"foo\")\n    api.exp_save(\"foo\", force=True)\n\n\ndef test_exp_show(tmp_dir, dvc, scm, exp_stage):  # noqa: F811\n    with open(\"params.yaml\", \"a\") as fobj:\n        fobj.write(\"\\nbar: 0\")\n    exps = api.exp_show()\n\n    assert len(exps) == 2\n    assert isinstance(exps, list)\n    assert isinstance(exps[0], dict)\n    assert isinstance(exps[1], dict)\n    # Postprocessing casting to float\n    assert exps[0][\"metrics.yaml:foo\"] == 1.0\n    # Postprocessing using `None` as fill value\n    assert exps[0][\"State\"] is None\n    # Postprocessing empty string as `None`\n    assert exps[0][\"Experiment\"] is None\n    # Postprocessing 0 as float\n    assert exps[0][\"bar\"] == 0.0\n"
  },
  {
    "path": "tests/func/api/test_scm.py",
    "content": "from dvc.api.scm import all_branches, all_commits, all_tags\n\n\ndef test_all_branches(tmp_dir, scm, dvc):\n    assert all_branches() == [\"master\"]\n\n    with tmp_dir.branch(\"branch\", new=True):\n        tmp_dir.scm_gen(\"branch\", \"branch\", \"commit\")\n\n    assert all_branches() == [\"branch\", \"master\"]\n\n\ndef test_all_commits(tmp_dir, scm, dvc):\n    first = scm.get_rev()\n    assert all_commits() == [first]\n\n    tmp_dir.scm_gen(\"foo\", \"foo\", \"commit\")\n    second = scm.get_rev()\n\n    assert set(all_commits()) == {first, second}\n\n\ndef test_all_tags(tmp_dir, scm, dvc):\n    scm.tag(\"v1\")\n    assert all_tags() == [\"v1\"]\n\n    tmp_dir.scm_gen(\"foo\", \"foo\", \"commit\")\n    scm.tag(\"v2\")\n\n    assert set(all_tags()) == {\"v1\", \"v2\"}\n"
  },
  {
    "path": "tests/func/api/test_show.py",
    "content": "import json\nimport os\nfrom textwrap import dedent\n\nimport pytest\n\nfrom dvc import api\n\nTRAIN_METRICS: list[dict[str, dict[str, float]]] = [\n    {\n        \"avg_prec\": {\"train\": 0.85, \"val\": 0.75},\n        \"roc_auc\": {\"train\": 0.80, \"val\": 0.70},\n    },\n    {\n        \"avg_prec\": {\"train\": 0.97, \"val\": 0.92},\n        \"roc_auc\": {\"train\": 0.98, \"val\": 0.94},\n    },\n]\nTEST_METRICS: list[dict[str, dict[str, float]]] = [\n    {\"avg_prec\": {\"test\": 0.72}, \"roc_auc\": {\"test\": 0.77}},\n    {\n        \"avg_prec\": {\"test\": 0.91},\n        \"roc_auc\": {\"test\": 0.92},\n    },\n]\n\n\n@pytest.fixture\ndef params_repo(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    tmp_dir.gen(\"params.json\", '{\"bar\": 2, \"foobar\": 3}')\n    tmp_dir.gen(\"other_params.json\", '{\"foo\": {\"bar\": 4}}')\n\n    dvc.run(name=\"stage-0\", cmd=\"echo stage-0\")\n\n    dvc.run(name=\"stage-1\", cmd=\"echo stage-1\", params=[\"foo\", \"params.json:bar\"])\n\n    dvc.run(name=\"stage-2\", cmd=\"echo stage-2\", params=[\"other_params.json:foo\"])\n\n    dvc.run(name=\"stage-3\", cmd=\"echo stage-2\", params=[\"params.json:foobar\"])\n\n    scm.add([\"params.yaml\", \"params.json\", \"other_params.json\", \"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"commit dvc files\")\n\n    tmp_dir.gen(\"params.yaml\", \"foo: 5\")\n    scm.add([\"params.yaml\"])\n    scm.commit(\"update params.yaml\")\n\n\n@pytest.fixture\ndef metrics_repo(tmp_dir, scm, dvc, run_copy_metrics):\n    dvc.run(name=\"prepare\", cmd=\"echo preparing data\")\n    scm.add([\"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"prepare data\")\n    sub_dir = tmp_dir / \"eval\"\n    sub_dir.mkdir()\n    tmp_dir.gen(\"tmp_train_val_metrics.json\", json.dumps(TRAIN_METRICS[0]))\n    train_metrics_file = os.path.join(sub_dir, \"train_val_metrics.json\")\n    run_copy_metrics(\n        \"tmp_train_val_metrics.json\",\n        train_metrics_file,\n        name=\"train\",\n        metrics_no_cache=[train_metrics_file],\n    )\n    (tmp_dir / \"tmp_train_val_metrics.json\").unlink()\n\n    scm.add([\"dvc.yaml\", \"dvc.lock\", train_metrics_file])\n    scm.commit(\"train model\")\n\n    test_metrics_file = os.path.join(sub_dir, \"test_metrics.json\")\n    tmp_dir.gen(\"tmp_test_metrics.json\", json.dumps(TEST_METRICS[0]))\n    run_copy_metrics(\n        \"tmp_test_metrics.json\",\n        test_metrics_file,\n        name=\"test\",\n        metrics_no_cache=[test_metrics_file],\n    )\n    (tmp_dir / \"tmp_test_metrics.json\").unlink()\n\n    scm.add([\"dvc.yaml\", \"dvc.lock\", test_metrics_file])\n    scm.commit(\"test model\")\n\n    with tmp_dir.branch(\"better-model\", new=True):\n        tmp_dir.gen(\"tmp_train_val_metrics.json\", json.dumps(TRAIN_METRICS[1]))\n        run_copy_metrics(\n            \"tmp_train_val_metrics.json\",\n            train_metrics_file,\n            name=\"train\",\n            metrics_no_cache=[train_metrics_file],\n        )\n        (tmp_dir / \"tmp_train_val_metrics.json\").unlink()\n\n        scm.add([\"dvc.yaml\", \"dvc.lock\", train_metrics_file])\n        scm.commit(\"train better model\")\n\n        tmp_dir.gen(\"tmp_test_metrics.json\", json.dumps(TEST_METRICS[1]))\n        run_copy_metrics(\n            \"tmp_test_metrics.json\",\n            test_metrics_file,\n            name=\"test\",\n            metrics_no_cache=[test_metrics_file],\n        )\n        (tmp_dir / \"tmp_test_metrics.json\").unlink()\n\n        scm.add([\"dvc.yaml\", \"dvc.lock\", test_metrics_file])\n        scm.commit(\"test better model\")\n\n    scm.checkout(\"master\")\n\n    return (\n        os.path.relpath(train_metrics_file, tmp_dir),\n        os.path.relpath(test_metrics_file, tmp_dir),\n    )\n\n\ndef test_params_show_no_args(params_repo):\n    assert api.params_show() == {\n        \"params.yaml:foo\": 5,\n        \"bar\": 2,\n        \"foobar\": 3,\n        \"other_params.json:foo\": {\"bar\": 4},\n    }\n\n\ndef test_params_show_targets(params_repo):\n    assert api.params_show(\"params.yaml\") == {\"foo\": 5}\n    assert api.params_show(\"params.yaml\", \"params.json\") == {\n        \"foo\": 5,\n        \"bar\": 2,\n        \"foobar\": 3,\n    }\n    assert api.params_show(\"params.yaml\", stages=\"stage-1\") == {\"bar\": 2, \"foo\": 5}\n\n\ndef test_params_show_deps(params_repo):\n    params = api.params_show(deps=True)\n    assert params == {\n        \"params.yaml:foo\": 5,\n        \"bar\": 2,\n        \"foobar\": 3,\n        \"other_params.json:foo\": {\"bar\": 4},\n    }\n\n\ndef test_params_show_stages(params_repo):\n    assert api.params_show(stages=\"stage-2\") == {\"foo\": {\"bar\": 4}}\n\n    assert api.params_show() == api.params_show(\n        stages=[\"stage-1\", \"stage-2\", \"stage-3\"]\n    )\n\n    assert api.params_show(\"params.json\", stages=\"stage-3\") == {\"bar\": 2, \"foobar\": 3}\n\n    assert api.params_show(stages=\"stage-0\") == {}\n\n\ndef test_params_show_stage_addressing(tmp_dir, dvc):\n    for subdir in (\"subdir1\", \"subdir2\"):\n        subdir = tmp_dir / subdir\n        subdir.mkdir()\n        with subdir.chdir():\n            subdir.gen(\"params.yaml\", \"foo: 1\")\n\n            dvc.run(name=\"stage-0\", cmd=\"echo stage-0\", params=[\"foo\"])\n\n    for s in (\"subdir1\", \"subdir2\"):\n        dvcyaml = os.path.join(s, \"dvc.yaml\")\n        assert api.params_show(stages=f\"{dvcyaml}:stage-0\") == {\"foo\": 1}\n\n    with subdir.chdir():\n        nested = subdir / \"nested\"\n        nested.mkdir()\n        with nested.chdir():\n            dvcyaml = os.path.join(\"..\", \"dvc.yaml\")\n            assert api.params_show(stages=f\"{dvcyaml}:stage-0\") == {\"foo\": 1}\n\n\ndef test_params_show_revs(params_repo):\n    assert api.params_show(rev=\"HEAD~1\") == {\n        \"params.yaml:foo\": 1,\n        \"bar\": 2,\n        \"foobar\": 3,\n        \"other_params.json:foo\": {\"bar\": 4},\n    }\n\n\ndef test_params_show_while_running_stage(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump({\"foo\": {\"bar\": 1}})\n    (tmp_dir / \"params.json\").dump({\"bar\": 2})\n\n    tmp_dir.gen(\n        \"merge.py\",\n        dedent(\n            \"\"\"\n            import json\n            from dvc import api\n            with open(\"merged.json\", \"w\") as f:\n                json.dump(api.params_show(stages=\"merge\"), f)\n        \"\"\"\n        ),\n    )\n    dvc.stage.add(\n        name=\"merge\",\n        cmd=\"python merge.py\",\n        params=[\"foo.bar\", {\"params.json\": [\"bar\"]}],\n        outs=[\"merged.json\"],\n    )\n\n    dvc.reproduce()\n\n    assert (tmp_dir / \"merged.json\").parse() == {\"foo\": {\"bar\": 1}, \"bar\": 2}\n\n\ndef test_params_show_repo(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(\"params.yaml\", \"foo: 1\", commit=\"Create params.yaml\")\n        erepo_dir.dvc.run(name=\"stage-1\", cmd=\"echo stage-1\", params=[\"foo\"])\n    assert api.params_show(repo=erepo_dir) == {\"foo\": 1}\n\n\ndef test_params_show_no_params_found(tmp_dir, dvc):\n    # Empty repo\n    assert api.params_show() == {}\n\n    # params.yaml but no dvc.yaml\n    (tmp_dir / \"params.yaml\").dump({\"foo\": 1})\n    assert api.params_show() == {\"foo\": 1}\n\n    # dvc.yaml but no params.yaml\n    (tmp_dir / \"params.yaml\").unlink()\n    dvc.stage.add(name=\"echo\", cmd=\"echo foo\")\n    assert api.params_show() == {}\n\n\ndef test_params_show_stage_without_params(tmp_dir, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n\n    dvc.run(name=\"stage-0\", cmd=\"echo stage-0\")\n\n    assert api.params_show(stages=\"stage-0\") == {}\n\n    assert api.params_show(deps=True) == {}\n\n\ndef test_params_show_untracked_target(params_repo, tmp_dir):\n    tmp_dir.gen(\"params_foo.yaml\", \"foo: 1\")\n\n    assert api.params_show(\"params_foo.yaml\") == {\"foo\": 1}\n\n    assert api.params_show(\"params_foo.yaml\", stages=\"stage-0\") == {\"foo\": 1}\n\n\ndef test_metrics_show_no_args(metrics_repo):\n    train_metrics_file, test_metrics_file = metrics_repo\n    assert api.metrics_show() == {\n        f\"{train_metrics_file}:avg_prec\": TRAIN_METRICS[0][\"avg_prec\"],\n        f\"{train_metrics_file}:roc_auc\": TRAIN_METRICS[0][\"roc_auc\"],\n        f\"{test_metrics_file}:avg_prec\": TEST_METRICS[0][\"avg_prec\"],\n        f\"{test_metrics_file}:roc_auc\": TEST_METRICS[0][\"roc_auc\"],\n    }\n\n\ndef test_metrics_show_targets(metrics_repo):\n    train_metrics_file, test_metrics_file = metrics_repo\n    assert api.metrics_show(train_metrics_file) == TRAIN_METRICS[0]\n    assert api.metrics_show(test_metrics_file) == TEST_METRICS[0]\n    assert api.metrics_show(train_metrics_file, test_metrics_file) == {\n        f\"{train_metrics_file}:avg_prec\": TRAIN_METRICS[0][\"avg_prec\"],\n        f\"{train_metrics_file}:roc_auc\": TRAIN_METRICS[0][\"roc_auc\"],\n        f\"{test_metrics_file}:avg_prec\": TEST_METRICS[0][\"avg_prec\"],\n        f\"{test_metrics_file}:roc_auc\": TEST_METRICS[0][\"roc_auc\"],\n    }\n\n\ndef test_metrics_show_no_metrics_found(tmp_dir, dvc):\n    # Empty repo\n    assert api.metrics_show() == {}\n\n    # dvc.yaml but no metrics\n    dvc.stage.add(name=\"echo\", cmd=\"echo foo\")\n    assert api.metrics_show() == {}\n\n\ndef test_metrics_show_rev_without_metrics(metrics_repo):\n    assert api.metrics_show(rev=\"HEAD~2\") == {}\n\n\ndef test_metrics_show_rev_with_metrics(metrics_repo):\n    train_metrics_file, test_metrics_file = metrics_repo\n    assert api.metrics_show(rev=\"HEAD~1\") == TRAIN_METRICS[0]\n    assert api.metrics_show(rev=\"HEAD\") == {\n        f\"{train_metrics_file}:avg_prec\": TRAIN_METRICS[0][\"avg_prec\"],\n        f\"{train_metrics_file}:roc_auc\": TRAIN_METRICS[0][\"roc_auc\"],\n        f\"{test_metrics_file}:avg_prec\": TEST_METRICS[0][\"avg_prec\"],\n        f\"{test_metrics_file}:roc_auc\": TEST_METRICS[0][\"roc_auc\"],\n    }\n    assert api.metrics_show(rev=\"better-model~1\") == {\n        f\"{train_metrics_file}:avg_prec\": TRAIN_METRICS[1][\"avg_prec\"],\n        f\"{train_metrics_file}:roc_auc\": TRAIN_METRICS[1][\"roc_auc\"],\n        f\"{test_metrics_file}:avg_prec\": TEST_METRICS[0][\"avg_prec\"],\n        f\"{test_metrics_file}:roc_auc\": TEST_METRICS[0][\"roc_auc\"],\n    }\n    assert api.metrics_show(rev=\"better-model\") == {\n        f\"{train_metrics_file}:avg_prec\": TRAIN_METRICS[1][\"avg_prec\"],\n        f\"{train_metrics_file}:roc_auc\": TRAIN_METRICS[1][\"roc_auc\"],\n        f\"{test_metrics_file}:avg_prec\": TEST_METRICS[1][\"avg_prec\"],\n        f\"{test_metrics_file}:roc_auc\": TEST_METRICS[1][\"roc_auc\"],\n    }\n\n\ndef test_metrics_show_dirty_working_dir(metrics_repo, tmp_dir):\n    train_metrics_file, test_metrics_file = metrics_repo\n    new_metrics = {\"acc\": 1}\n    (tmp_dir / train_metrics_file).unlink()\n    (tmp_dir / train_metrics_file).dump(new_metrics)\n    (tmp_dir / test_metrics_file).unlink()\n    (tmp_dir / test_metrics_file).dump(new_metrics)\n\n    assert api.metrics_show() == {\n        f\"{train_metrics_file}:acc\": new_metrics[\"acc\"],\n        f\"{test_metrics_file}:acc\": new_metrics[\"acc\"],\n    }\n"
  },
  {
    "path": "tests/func/artifacts/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/artifacts/test_artifacts.py",
    "content": "import logging\nimport os\nfrom copy import deepcopy\n\nimport pytest\n\nfrom dvc import env\nfrom dvc.annotations import Artifact\nfrom dvc.exceptions import ArtifactNotFoundError, InvalidArgumentError\nfrom dvc.repo.artifacts import Artifacts, check_name_format\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc.utils import as_posix\nfrom dvc.utils.strictyaml import YAMLSyntaxError, YAMLValidationError\n\ndvcyaml = {\n    \"artifacts\": {\n        \"myart\": {\"type\": \"model\", \"path\": \"myart.pkl\"},\n        \"hello\": {\"type\": \"file\", \"path\": \"hello.txt\"},\n        \"world\": {\n            \"type\": \"object\",\n            \"path\": \"world.txt\",\n            \"desc\": \"The world is not enough\",\n            \"labels\": [\"but\", \"this\", \"is\"],\n            \"meta\": {\"such\": \"a\", \"perfect\": \"place to start\"},\n        },\n    }\n}\n\n\ndef test_artifacts_read_subdir(tmp_dir, dvc):\n    (tmp_dir / \"dvc.yaml\").dump(dvcyaml)\n\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    (subdir / \"dvc.yaml\").dump(dvcyaml)\n\n    artifacts = {\n        name: Artifact(**values) for name, values in dvcyaml[\"artifacts\"].items()\n    }\n    assert tmp_dir.dvc.artifacts.read() == {\n        \"dvc.yaml\": artifacts,\n        f\"subdir{os.path.sep}dvc.yaml\": artifacts,\n    }\n\n\ndef test_artifacts_read_bad_name(tmp_dir, dvc, caplog):\n    bad_name_dvcyaml = deepcopy(dvcyaml)\n    bad_name_dvcyaml[\"artifacts\"][\"_bad_name_\"] = {\"type\": \"model\", \"path\": \"bad.pkl\"}\n\n    (tmp_dir / \"dvc.yaml\").dump(bad_name_dvcyaml)\n\n    artifacts = {\n        name: Artifact(**values)\n        for name, values in bad_name_dvcyaml[\"artifacts\"].items()\n    }\n\n    with caplog.at_level(logging.WARNING):\n        assert tmp_dir.dvc.artifacts.read() == {\"dvc.yaml\": artifacts}\n        assert \"Can't use '_bad_name_' as artifact name (ID)\" in caplog.text\n\n\ndef test_artifacts_add_subdir(tmp_dir, dvc):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    (subdir / \"dvc.yaml\").dump(dvcyaml)\n\n    new_art = Artifact(path=\"path\")\n    tmp_dir.dvc.artifacts.add(\"new\", new_art, dvcfile=\"subdir/dvc.yaml\")\n\n    artifacts = {\n        name: Artifact(**values) for name, values in dvcyaml[\"artifacts\"].items()\n    }\n    artifacts[\"new\"] = new_art\n    assert tmp_dir.dvc.artifacts.read() == {f\"subdir{os.path.sep}dvc.yaml\": artifacts}\n\n\ndef test_artifacts_add_abspath(tmp_dir, dvc):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    new_art = Artifact(path=\"path\")\n    tmp_dir.dvc.artifacts.add(\n        \"new\", new_art, dvcfile=os.path.abspath(\"subdir/dvc.yaml\")\n    )\n\n    assert tmp_dir.dvc.artifacts.read() == {\n        f\"subdir{os.path.sep}dvc.yaml\": {\"new\": new_art},\n    }\n\n\ndef test_artifacts_add_fails_on_dvc_subrepo(tmp_dir, dvc):\n    # adding artifact to the DVC subrepo from the parent DVC repo\n    # shouldn't work\n    subdir = tmp_dir / \"subdir\"\n    (subdir / \".dvc\").mkdir(parents=True)\n\n    with pytest.raises(InvalidArgumentError):\n        tmp_dir.dvc.artifacts.add(\n            \"failing\", Artifact(path=\"path\"), dvcfile=\"subdir/dvc.yaml\"\n        )\n\n    with pytest.raises(InvalidArgumentError):\n        tmp_dir.dvc.artifacts.add(\n            \"failing\", Artifact(path=\"path\"), dvcfile=\"subdir/dvclive/dvc.yaml\"\n        )\n\n\nbad_dvcyaml_extra_field = {\n    \"artifacts\": {\n        \"lol\": {\"kek\": \"cheburek\", \"path\": \"lol\"},\n        \"hello\": {\"type\": \"file\", \"path\": \"hello.txt\"},\n    }\n}\n\n\nbad_dvcyaml_missing_path = {\"artifacts\": {\"lol\": {}}}\n\n\n@pytest.mark.parametrize(\n    \"bad_dvcyaml\", [bad_dvcyaml_extra_field, bad_dvcyaml_missing_path]\n)\ndef test_broken_dvcyaml_extra_field(tmp_dir, dvc, bad_dvcyaml):\n    (tmp_dir / \"dvc.yaml\").dump(bad_dvcyaml)\n\n    with pytest.raises(YAMLValidationError):\n        tmp_dir.dvc.artifacts.read()\n\n\nbad_dvcyaml_id_duplication = \"\"\"\nartifacts:\n  lol:\n    type: kek\n  lol: {}\n\"\"\"\n\n\ndef test_artifacts_read_fails_on_id_duplication(tmp_dir, dvc):\n    with open(tmp_dir / \"dvc.yaml\", \"w\") as f:\n        f.write(bad_dvcyaml_id_duplication)\n\n    with pytest.raises(YAMLSyntaxError):\n        tmp_dir.dvc.artifacts.read()\n\n\n@pytest.mark.parametrize(\n    \"name\", [\"1\", \"m\", \"nn\", \"m1\", \"1nn\", \"model-prod\", \"model-prod-v1\"]\n)\ndef test_name_is_compatible(name):\n    check_name_format(name)\n\n\n@pytest.mark.parametrize(\n    \"name\",\n    [\n        \"\",\n        \"m/\",\n        \"/m\",\n        \"###\",\n        \"@@@\",\n        \"a model\",\n        \"-model\",\n        \"model-\",\n        \"model@1\",\n        \"model#1\",\n        \"@namespace/model\",\n    ],\n)\ndef test_name_is_compatible_fails(name):\n    with pytest.raises(InvalidArgumentError):\n        check_name_format(name)\n\n\ndef test_get_rev(tmp_dir, dvc, scm):\n    scm.tag(\"myart@v1.0.0#1\", annotated=True, message=\"foo\")\n    scm.tag(\"subdir=myart@v2.0.0#1\", annotated=True, message=\"foo\")\n    scm.tag(\"myart#dev#1\", annotated=True, message=\"foo\")\n    rev = scm.get_rev()\n\n    assert dvc.artifacts.get_rev(\"myart\") == rev\n    assert dvc.artifacts.get_rev(\"myart\", version=\"v1.0.0\") == rev\n    assert dvc.artifacts.get_rev(\"subdir:myart\", version=\"v2.0.0\") == rev\n    assert dvc.artifacts.get_rev(\"subdir/dvc.yaml:myart\", version=\"v2.0.0\") == rev\n    with pytest.raises(ArtifactNotFoundError):\n        dvc.artifacts.get_rev(\"myart\", version=\"v3.0.0\")\n    with pytest.raises(ArtifactNotFoundError):\n        dvc.artifacts.get_rev(\"myart\", stage=\"prod\")\n\n\ndef test_get_path(tmp_dir, dvc, scm):\n    (tmp_dir / \"dvc.yaml\").dump(dvcyaml)\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n    (subdir / \"dvc.yaml\").dump(dvcyaml)\n\n    assert dvc.artifacts.get_path(\"myart\") == \"myart.pkl\"\n    assert dvc.artifacts.get_path(\"subdir:myart\") == os.path.join(\"subdir\", \"myart.pkl\")\n    assert dvc.artifacts.get_path(\"subdir/dvc.yaml:myart\") == os.path.join(\n        \"subdir\", \"myart.pkl\"\n    )\n\n\ndef test_parametrized(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump({\"path\": \"myart.pkl\"})\n    (tmp_dir / \"dvc.yaml\").dump(\n        {\"artifacts\": {\"myart\": {\"type\": \"model\", \"path\": \"${path}\"}}}\n    )\n    assert tmp_dir.dvc.artifacts.read() == {\n        \"dvc.yaml\": {\"myart\": Artifact(path=\"myart.pkl\", type=\"model\")}\n    }\n\n\ndef test_get_path_subrepo(tmp_dir, scm, dvc):\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm)\n    (subrepo / \"dvc.yaml\").dump(dvcyaml)\n\n    assert dvc.artifacts.get_path(\"subrepo:myart\") == os.path.join(\n        \"subrepo\", \"myart.pkl\"\n    )\n    assert dvc.artifacts.get_path(\"subrepo/dvc.yaml:myart\") == os.path.join(\n        \"subrepo\", \"myart.pkl\"\n    )\n\n    assert subrepo.dvc.artifacts.get_path(\"subrepo:myart\") == os.path.join(\n        \"subrepo\", \"myart.pkl\"\n    )\n    assert subrepo.dvc.artifacts.get_path(\"subrepo/dvc.yaml:myart\") == os.path.join(\n        \"subrepo\", \"myart.pkl\"\n    )\n\n\ndef get_tag_and_name(dirname, name, version):\n    tagname = f\"{name}@{version}\"\n    if dirname in (os.curdir, \"\"):\n        return tagname, name\n    return f\"{dirname}={tagname}\", f\"{dirname}:{name}\"\n\n\ndef make_artifact(tmp_dir, name, tag, path) -> Artifact:\n    artifact = Artifact(path=path.name, type=\"model\")\n    dvcfile = path.with_name(\"dvc.yaml\")\n\n    tmp_dir.scm_gen(path, \"hello_world\", commit=\"add myart.pkl\")\n    tmp_dir.dvc.artifacts.add(name, artifact, dvcfile=os.fspath(dvcfile))\n    tmp_dir.scm.add_commit([dvcfile], message=\"add dvc.yaml\")\n    tmp_dir.scm.tag(tag, annotated=True, message=\"foo\")\n    return artifact\n\n\n@pytest.mark.parametrize(\"sub\", [\"sub\", \"\"])\ndef test_artifacts_download(tmp_dir, dvc, scm, sub):\n    subdir = tmp_dir / sub\n    dirname = str(subdir.relative_to(tmp_dir))\n    tag, name = get_tag_and_name(as_posix(dirname), \"myart\", \"v2.0.0\")\n    make_artifact(tmp_dir, \"myart\", tag, subdir / \"myart.pkl\")\n\n    result = (1, \"myart.pkl\")\n    assert Artifacts.get(\".\", name, force=True) == result\n    assert Artifacts.get(tmp_dir.fs_path, name, force=True) == result\n    assert Artifacts.get(f\"file://{tmp_dir.as_posix()}\", name, force=True) == result\n    assert Artifacts.get(subdir.fs_path, name, force=True) == result\n    with subdir.chdir():\n        assert Artifacts.get(\".\", name, force=True) == result\n\n\n@pytest.mark.parametrize(\"sub\", [\"sub\", \"\"])\ndef test_artifacts_download_subrepo(tmp_dir, scm, sub):\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm)\n    subdir = subrepo / sub\n\n    dirname = str(subdir.relative_to(tmp_dir))\n    tag, name = get_tag_and_name(as_posix(dirname), \"myart\", \"v2.0.0\")\n    make_artifact(subrepo, \"myart\", tag, subdir / \"myart.pkl\")\n\n    result = (1, \"myart.pkl\")\n    assert Artifacts.get(\".\", name) == result\n    assert Artifacts.get(tmp_dir.fs_path, name, force=True) == result\n    assert Artifacts.get(f\"file://{tmp_dir.as_posix()}\", name, force=True) == result\n    assert Artifacts.get(subdir.fs_path, name, force=True) == result\n    with subdir.chdir():\n        assert Artifacts.get(\".\", name, force=True) == result\n\n\ndef test_artifacts_download_studio(monkeypatch, tmp_dir, dvc, mocker, tmp_path_factory):\n    global_config_dir = tmp_path_factory.mktemp(\"global_config\")\n    monkeypatch.setenv(env.DVC_GLOBAL_CONFIG_DIR, str(global_config_dir))\n    # reset config to ensure it picks up the new global config\n    dvc.__dict__.pop(\"config\")\n    assert dvc.config.files[\"global\"] == os.fspath(global_config_dir / \"config\")\n\n    with dvc.config.edit(\"global\") as conf:\n        conf[\"studio\"][\"token\"] = \"mytoken\"\n\n    download_studio = mocker.patch(\"dvc.repo.artifacts.Artifacts._download_studio\")\n    Artifacts.get(\"myart.pkl\", \"myart.pkl\")\n    assert download_studio.call_args.kwargs[\"dvc_studio_config\"][\"token\"] == \"mytoken\"\n"
  },
  {
    "path": "tests/func/data/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/data/db/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/data/db/test_index.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.exceptions import DownloadError, UploadError\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.db import get_index\n\n\n@pytest.fixture\ndef index(tmp_dir, dvc, local_remote):\n    odb = dvc.cloud.get_remote_odb(\"upstream\")\n    return get_index(odb)\n\n\ndef test_indexed_on_status(tmp_dir, dvc, index):\n    foo = tmp_dir.dvc_gen({\"foo\": \"foo content\"})[0].outs[0]\n    bar = tmp_dir.dvc_gen({\"bar\": {\"baz\": \"baz content\"}})[0].outs[0]\n    baz_hash = bar.obj._trie.get((\"baz\",))[1]\n    dvc.push()\n    index.clear()\n\n    dvc.status(cloud=True)\n    assert {bar.hash_info.value, baz_hash.value} == set(index.hashes())\n    assert [bar.hash_info.value] == list(index.dir_hashes())\n    assert foo.hash_info.value not in index.hashes()\n\n\ndef test_indexed_on_push(tmp_dir, dvc, index):\n    foo = tmp_dir.dvc_gen({\"foo\": \"foo content\"})[0].outs[0]\n    bar = tmp_dir.dvc_gen({\"bar\": {\"baz\": \"baz content\"}})[0].outs[0]\n    baz_hash = bar.obj._trie.get((\"baz\",))[1]\n\n    dvc.push()\n    assert {bar.hash_info.value, baz_hash.value} == set(index.hashes())\n    assert [bar.hash_info.value] == list(index.dir_hashes())\n    assert foo.hash_info.value not in index.hashes()\n\n\ndef test_indexed_dir_missing(tmp_dir, dvc, index):\n    bar = tmp_dir.dvc_gen({\"bar\": {\"baz\": \"baz content\"}})[0].outs[0]\n    index.update([bar.hash_info.value], [])\n    dvc.status(cloud=True)\n    assert not list(index.hashes())\n\n\ndef test_clear_on_gc(tmp_dir, dvc, index):\n    (foo,) = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}})\n    dvc.push()\n    dvc.remove(foo.relpath)\n\n    assert list(index.hashes())\n    dvc.gc(workspace=True, cloud=True)\n    assert not list(index.hashes())\n\n\ndef test_clear_on_download_err(tmp_dir, dvc, index, mocker):\n    out = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}})[0].outs[0]\n    dvc.push()\n\n    assert list(index.hashes())\n\n    for _, _, hi in out.obj:\n        remove(dvc.cache.local.get(hi.value).path)\n        remove(dvc.cloud.get_remote().odb.get(hi.value).path)\n    remove(out.fs_path)\n\n    with pytest.raises(DownloadError):\n        dvc.pull()\n    assert not list(index.hashes())\n\n\ndef test_partial_upload(tmp_dir, dvc, index, mocker):\n    from dvc_objects.fs import generic\n\n    tmp_dir.dvc_gen({\"foo\": \"foo content\"})\n    baz = tmp_dir.dvc_gen({\"bar\": {\"baz\": \"baz content\"}})[0].outs[0]\n\n    original = generic.transfer\n    odb = dvc.cloud.get_remote_odb(\"upstream\")\n\n    def unreliable_upload(from_fs, from_info, to_fs, to_info, **kwargs):\n        on_error = kwargs[\"on_error\"]\n        assert on_error\n        if isinstance(from_info, str):\n            from_info = [from_info]\n        else:\n            from_info = list(from_info)\n        if isinstance(to_info, str):\n            to_info = [to_info]\n        else:\n            to_info = list(to_info)\n        for i in range(len(from_info) - 1, -1, -1):\n            from_i = from_info[i]\n            to_i = to_info[i]\n            if os.path.abspath(to_i) == os.path.abspath(\n                odb.get(baz.hash_info.value).path\n            ):\n                if on_error:\n                    on_error(from_i, to_i, Exception(\"stop baz\"))\n                del from_info[i]\n                del to_info[i]\n\n        return original(from_fs, from_info, to_fs, to_info, **kwargs)\n\n    mocker.patch(\"dvc_objects.fs.generic.transfer\", unreliable_upload)\n    with pytest.raises(UploadError):\n        dvc.push()\n    assert not list(index.hashes())\n"
  },
  {
    "path": "tests/func/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/experiments/conftest.py",
    "content": "import pytest\n\nfrom tests.unit.repo.experiments.conftest import (  # noqa: F401\n    exp_stage,\n    failed_exp_stage,\n    session_app,\n    session_queue,\n    session_worker,\n    test_queue,\n)\n\n\n@pytest.fixture\ndef http_auth_patch(mocker):\n    from dulwich.client import HTTPUnauthorized\n\n    url = \"https://0.0.0.0\"\n    client = mocker.MagicMock()\n    client.get_refs.side_effect = HTTPUnauthorized(\"\", url)\n    client.send_pack.side_effect = HTTPUnauthorized(\"\", url)\n\n    patch = mocker.patch(\"dulwich.client.get_transport_and_path\")\n    patch.return_value = (client, url)\n    return url\n\n\n@pytest.fixture(params=[True, False])\ndef workspace(request, session_queue) -> bool:  # noqa: F811\n    return request.param\n\n\n@pytest.fixture\ndef params_repo(tmp_dir, scm, dvc):\n    (tmp_dir / \"params.yaml\").dump(\n        {\"foo\": [{\"bar\": 1}, {\"baz\": 2}], \"goo\": {\"bag\": 3.0}, \"lorem\": False}\n    )\n    dvc.run(cmd=\"echo foo\", params=[\"params.yaml:\"], name=\"foo\")\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.yaml\"])\n    scm.commit(\"init\")\n"
  },
  {
    "path": "tests/func/experiments/executor/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/experiments/test_apply.py",
    "content": "import pytest\nfrom funcy import first\n\nfrom dvc.repo.experiments.refs import CELERY_STASH\n\n\ndef test_apply(tmp_dir, scm, dvc, exp_stage):\n    from dvc.exceptions import InvalidArgumentError\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], tmp_dir=True)\n    exp_a = first(results)\n\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=3\"], tmp_dir=True, name=\"foo\"\n    )\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.apply(\"bar\")\n\n    dvc.experiments.apply(exp_a)\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 2\"\n    assert (tmp_dir / \"metrics.yaml\").read_text().strip() == \"foo: 2\"\n\n    dvc.experiments.apply(\"foo\")\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 3\"\n    assert (tmp_dir / \"metrics.yaml\").read_text().strip() == \"foo: 3\"\n\n\ndef test_apply_failed(tmp_dir, scm, dvc, failed_exp_stage, mocker):\n    from dvc.repo.experiments.queue.base import QueueDoneResult, QueueEntry\n\n    dvc.experiments.run(\n        failed_exp_stage.addressing, params=[\"foo=3\"], queue=True, name=\"foo\"\n    )\n    exp_rev = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n\n    # patch iter_done to return exp_rev as a failed exp (None-type result)\n    queue = dvc.experiments.celery_queue\n    mocker.patch.object(\n        queue,\n        \"iter_done\",\n        return_value=[\n            QueueDoneResult(\n                QueueEntry(\"\", \"\", queue.ref, exp_rev, \"\", None, \"foo\", None),\n                None,\n            ),\n        ],\n    )\n    mocker.patch.object(queue, \"iter_queued\", return_value=[])\n\n    dvc.experiments.apply(exp_rev)\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 3\"\n\n    scm.reset(hard=True)\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 1\"\n    dvc.experiments.apply(\"foo\")\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 3\"\n\n\ndef test_apply_queued(tmp_dir, scm, dvc, exp_stage):\n    metrics_original = (tmp_dir / \"metrics.yaml\").read_text().strip()\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], name=\"exp-a\", queue=True\n    )\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=3\"], name=\"exp-b\", queue=True\n    )\n    queue_revs = {\n        entry.name: entry.stash_rev\n        for entry in dvc.experiments.celery_queue.iter_queued()\n    }\n\n    dvc.experiments.apply(queue_revs[\"exp-a\"])\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 2\"\n    assert (tmp_dir / \"metrics.yaml\").read_text().strip() == metrics_original\n\n    dvc.experiments.apply(queue_revs[\"exp-b\"])\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 3\"\n    assert (tmp_dir / \"metrics.yaml\").read_text().strip() == metrics_original\n\n\ndef test_apply_untracked(tmp_dir, scm, dvc, exp_stage):\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp = first(results)\n    tmp_dir.gen(\"untracked\", \"untracked\")\n    tmp_dir.gen(\"params.yaml\", \"conflict\")\n\n    dvc.experiments.apply(exp)\n    assert (tmp_dir / \"untracked\").read_text() == \"untracked\"\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 2\"\n\n\ndef test_apply_unchanged_head(tmp_dir, scm, dvc, exp_stage):\n    # see https://github.com/treeverse/dvc/issues/8764\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"params.yaml\", \"metrics.yaml\"])\n    scm.commit(\"commit foo=2\")\n    results = dvc.experiments.run(exp_stage.addressing)\n    # workspace now contains unchanged (git-committed) params.yaml w/foo: 2\n    exp = first(results)\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    # workspace now contains changed params.yaml w/foo: 3\n\n    dvc.experiments.apply(exp)\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 2\"\n"
  },
  {
    "path": "tests/func/experiments/test_diff.py",
    "content": "from funcy import first\n\n\ndef test_diff_empty(tmp_dir, scm, dvc, exp_stage):\n    assert dvc.experiments.diff() == {\"params\": {}, \"metrics\": {}}\n\n\ndef test_diff_head(tmp_dir, scm, dvc, exp_stage):\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp = first(results)\n\n    assert dvc.experiments.diff(a_rev=\"HEAD\", b_rev=exp) == {\n        \"params\": {\"params.yaml\": {\"foo\": {\"diff\": 1, \"old\": 1, \"new\": 2}}},\n        \"metrics\": {\"metrics.yaml\": {\"foo\": {\"diff\": 1, \"old\": 1, \"new\": 2}}},\n    }\n\n\ndef test_diff_exp(tmp_dir, scm, dvc, exp_stage):\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_a = first(results)\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp_b = first(results)\n\n    assert dvc.experiments.diff(a_rev=exp_a, b_rev=exp_b) == {\n        \"params\": {\"params.yaml\": {\"foo\": {\"diff\": 1, \"old\": 2, \"new\": 3}}},\n        \"metrics\": {\"metrics.yaml\": {\"foo\": {\"diff\": 1, \"old\": 2, \"new\": 3}}},\n    }\n"
  },
  {
    "path": "tests/func/experiments/test_experiments.py",
    "content": "import itertools\nimport logging\nimport os\nimport stat\nfrom textwrap import dedent\n\nimport dulwich\nimport pytest\nfrom configobj import ConfigObj\nfrom funcy import first\n\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.env import (\n    DVC_EXP_BASELINE_REV,\n    DVC_EXP_NAME,\n    DVC_ROOT,\n    DVC_STUDIO_OFFLINE,\n    DVC_STUDIO_REPO_URL,\n    DVC_STUDIO_TOKEN,\n    DVC_STUDIO_URL,\n)\nfrom dvc.exceptions import DvcException, ReproductionError\nfrom dvc.repo import Repo\nfrom dvc.repo.experiments.exceptions import ExperimentExistsError\nfrom dvc.repo.experiments.queue.base import BaseStashQueue\nfrom dvc.repo.experiments.refs import CELERY_STASH\nfrom dvc.repo.experiments.utils import exp_refs_by_rev\nfrom dvc.scm import SCMError, resolve_rev\nfrom dvc.stage.exceptions import StageFileDoesNotExistError\nfrom dvc.testing.scripts import COPY_SCRIPT\nfrom dvc.utils.serialize import PythonFileCorruptedError\n\n\n@pytest.mark.parametrize(\"name\", [None, \"foo\"])\ndef test_new_simple(tmp_dir, scm, dvc, exp_stage, mocker, name, workspace):\n    baseline = scm.get_rev()\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n\n    new_mock = mocker.spy(dvc.experiments, \"new\")\n    results = dvc.experiments.run(\n        exp_stage.addressing, name=name, tmp_dir=not workspace\n    )\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n    assert ref_info\n    assert ref_info.baseline_sha == baseline\n\n    new_mock.assert_called_once()\n    fs = scm.get_fs(exp)\n    with fs.open(\"metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"foo: 2\"\n\n    if workspace:\n        assert (tmp_dir / \"metrics.yaml\").read_text().strip() == \"foo: 2\"\n\n    exp_name = name if name else ref_info.name\n    assert dvc.experiments.get_exact_name([exp])[exp] == exp_name\n    assert resolve_rev(scm, exp_name) == exp\n\n\ndef test_experiment_exists(tmp_dir, scm, dvc, exp_stage, mocker, workspace):\n    dvc.experiments.run(\n        exp_stage.addressing,\n        name=\"foo\",\n        params=[\"foo=2\"],\n        tmp_dir=not workspace,\n    )\n\n    new_mock = mocker.spy(BaseStashQueue, \"_stash_exp\")\n    with pytest.raises(ExperimentExistsError):\n        dvc.experiments.run(\n            exp_stage.addressing,\n            name=\"foo\",\n            params=[\"foo=3\"],\n            tmp_dir=not workspace,\n        )\n    new_mock.assert_not_called()\n\n    results = dvc.experiments.run(\n        exp_stage.addressing,\n        name=\"foo\",\n        params=[\"foo=3\"],\n        force=True,\n        tmp_dir=not workspace,\n    )\n    exp = first(results)\n\n    fs = scm.get_fs(exp)\n    with fs.open(\"metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"foo: 3\"\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"Not supported for Windows.\")\ndef test_file_permissions(tmp_dir, scm, dvc, exp_stage, mocker):\n    mode = 0o755\n    os.chmod(tmp_dir / \"copy.py\", mode)\n    scm.add([\"copy.py\"])\n    scm.commit(\"set exec\")\n\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n    dvc.experiments.run(exp_stage.addressing)\n    assert stat.S_IMODE(os.stat(tmp_dir / \"copy.py\").st_mode) == mode\n\n\ndef test_failed_exp_workspace(tmp_dir, scm, dvc, failed_exp_stage, mocker, capsys):\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n    with pytest.raises(ReproductionError):\n        dvc.experiments.run(failed_exp_stage.addressing)\n    assert not dvc.fs.exists(\n        os.path.join(dvc.experiments.workspace_queue.pid_dir, \"workspace\")\n    )\n\n\ndef test_get_baseline(tmp_dir, scm, dvc, exp_stage):\n    init_rev = scm.get_rev()\n    assert dvc.experiments.get_baseline(init_rev) is None\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_rev = first(results)\n    assert dvc.experiments.get_baseline(exp_rev) == init_rev\n\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], queue=True)\n    assert dvc.experiments.get_baseline(f\"{CELERY_STASH}@{{0}}\") == init_rev\n\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.yaml\", \"metrics.yaml\"])\n    scm.commit(\"promote exp\")\n    promote_rev = scm.get_rev()\n    assert dvc.experiments.get_baseline(promote_rev) is None\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=4\"])\n    exp_rev = first(results)\n    assert dvc.experiments.get_baseline(exp_rev) == promote_rev\n\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=5\"], queue=True)\n    assert dvc.experiments.get_baseline(f\"{CELERY_STASH}@{{0}}\") == promote_rev\n    assert dvc.experiments.get_baseline(f\"{CELERY_STASH}@{{1}}\") == init_rev\n\n\ndef test_update_py_params(tmp_dir, scm, dvc, session_queue, copy_script):\n    tmp_dir.gen(\"params.py\", \"INT = 1\\n\")\n    stage = dvc.run(\n        cmd=\"python copy.py params.py metrics.py\",\n        metrics_no_cache=[\"metrics.py\"],\n        params=[\"params.py:INT\"],\n        name=\"copy-file\",\n    )\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.py\", \"metrics.py\"])\n    scm.commit(\"init\")\n\n    results = dvc.experiments.run(\n        stage.addressing, params=[\"params.py:INT=2\"], tmp_dir=True\n    )\n    exp_a = first(results)\n\n    fs = scm.get_fs(exp_a)\n    with fs.open(\"params.py\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"INT = 2\"\n    with fs.open(\"metrics.py\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"INT = 2\"\n\n    tmp_dir.gen(\n        \"params.py\",\n        (\n            \"INT = 1\\nFLOAT = 0.001\\nDICT = {'a': 1}\\n\\n\"\n            \"class Train:\\n    seed = 2020\\n\\n\"\n            \"class Klass:\\n    def __init__(self):\\n        self.a = 111\\n\"\n        ),\n    )\n    stage = dvc.run(\n        cmd=\"python copy.py params.py metrics.py\",\n        metrics_no_cache=[\"metrics.py\"],\n        params=[\"params.py:INT,FLOAT,DICT,Train,Klass\"],\n        name=\"copy-file\",\n    )\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.py\", \"metrics.py\"])\n    scm.commit(\"init\")\n\n    results = dvc.experiments.run(\n        stage.addressing,\n        params=[\n            \"params.py:FLOAT=0.1\",\n            \"params.py:Train.seed=2121\",\n            \"params.py:Klass.a=222\",\n        ],\n        tmp_dir=True,\n    )\n    exp_a = first(results)\n\n    result = (\n        \"INT = 1\\nFLOAT = 0.1\\nDICT = {'a': 1}\\n\\n\"\n        \"class Train:\\n    seed = 2121\\n\\n\"\n        \"class Klass:\\n    def __init__(self):\\n        self.a = 222\"\n    )\n\n    def _dos2unix(text):\n        if os.name != \"nt\":\n            return text\n\n        # NOTE: git on windows will use CRLF, so we have to convert it to LF\n        # in order to compare with the original\n        return text.replace(\"\\r\\n\", \"\\n\")\n\n    fs = scm.get_fs(exp_a)\n    with fs.open(\"params.py\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert _dos2unix(fobj.read().strip()) == result\n    with fs.open(\"metrics.py\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert _dos2unix(fobj.read().strip()) == result\n\n    tmp_dir.gen(\"params.py\", \"INT = 1\\n\")\n    stage = dvc.run(\n        cmd=\"python copy.py params.py metrics.py\",\n        metrics_no_cache=[\"metrics.py\"],\n        params=[\"params.py:INT\"],\n        name=\"copy-file\",\n    )\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.py\", \"metrics.py\"])\n    scm.commit(\"init\")\n\n    with pytest.raises(PythonFileCorruptedError):\n        dvc.experiments.run(stage.addressing, params=[\"params.py:INT=2a\"], tmp_dir=True)\n\n\ndef test_detached_parent(tmp_dir, scm, dvc, exp_stage, mocker):\n    detached_rev = scm.get_rev()\n\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n    dvc.reproduce(exp_stage.addressing)\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.yaml\", \"metrics.yaml\"])\n    scm.commit(\"v2\")\n\n    scm.checkout(detached_rev)\n    assert scm.pygit2.repo.head_is_detached\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n\n    exp_rev = first(results)\n    assert dvc.experiments.get_baseline(exp_rev) == detached_rev\n    assert (tmp_dir / \"params.yaml\").read_text().strip() == \"foo: 3\"\n\n\ndef test_branch(tmp_dir, scm, dvc, exp_stage):\n    from dvc.exceptions import InvalidArgumentError\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.branch(\"foo\", \"branch\")\n\n    scm.branch(\"branch-exists\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=\"foo\")\n    exp_a = first(results)\n    ref_a = dvc.experiments.get_branch_by_rev(exp_a)\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.branch(\"foo\", \"branch-exists\")\n    dvc.experiments.branch(\"foo\")\n    dvc.experiments.branch(\"foo\", \"branch-name\")\n    dvc.experiments.branch(exp_a, \"branch-rev\")\n    dvc.experiments.branch(ref_a, \"branch-ref\")\n\n    for name in [\"foo-branch\", \"branch-name\", \"branch-rev\", \"branch-ref\"]:\n        assert name in scm.list_branches()\n        assert scm.resolve_rev(name) == exp_a\n\n    tmp_dir.scm_gen({\"new_file\": \"new_file\"}, commit=\"new baseline\")\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=\"foo\")\n    exp_b = first(results)\n    ref_b = dvc.experiments.get_branch_by_rev(exp_b)\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.branch(\"foo\", \"branch-name\")\n    dvc.experiments.branch(ref_b, \"branch-ref-b\")\n\n    assert \"branch-ref-b\" in scm.list_branches()\n    assert scm.resolve_rev(\"branch-ref-b\") == exp_b\n\n\ndef test_no_scm(tmp_dir):\n    from dvc.repo import Repo as DvcRepo\n    from dvc.scm import NoSCMError\n\n    dvc = DvcRepo.init(no_scm=True)\n\n    for cmd in [\n        \"apply\",\n        \"branch\",\n        \"diff\",\n        \"show\",\n        \"run\",\n        \"gc\",\n        \"push\",\n        \"pull\",\n        \"ls\",\n    ]:\n        with pytest.raises(NoSCMError):\n            getattr(dvc.experiments, cmd)()\n\n\ndef test_untracked(tmp_dir, scm, dvc, caplog, workspace, copy_script):\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: 1\", commit=\"track params\")\n    stage = dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        deps=[\"copy.py\"],\n        name=\"copy-file\",\n        no_exec=True,\n    )\n\n    # copy.py is untracked\n    # with caplog.at_level(logging.ERROR):\n    #     results = dvc.experiments.run(\n    #         stage.addressing, params=[\"foo=2\"], tmp_dir=True\n    #     )\n    #     assert \"Failed to reproduce experiment\" in caplog.text\n    #     assert not results\n\n    # dvc.yaml, copy.py are staged as new file but not committed\n    scm.add([\"dvc.yaml\", \"copy.py\"])\n    results = dvc.experiments.run(\n        stage.addressing, params=[\"foo=2\"], tmp_dir=not workspace\n    )\n    exp = first(results)\n    fs = scm.get_fs(exp)\n    assert fs.exists(\"dvc.yaml\")\n    assert fs.exists(\"dvc.lock\")\n    assert fs.exists(\"copy.py\")\n    with fs.open(\"metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"foo: 2\"\n\n\ndef test_packed_args_exists(tmp_dir, scm, dvc, exp_stage, caplog):\n    from dvc.repo.experiments.executor.base import BaseExecutor\n\n    tmp_dir.scm_gen(\n        tmp_dir / \".dvc\" / \"tmp\" / BaseExecutor.PACKED_ARGS_FILE,\n        \"\",\n        commit=\"commit args file\",\n        force=True,\n    )\n\n    with caplog.at_level(logging.WARNING):\n        dvc.experiments.run(exp_stage.addressing)\n        assert \"Temporary DVC file\" in caplog.text\n    assert not (tmp_dir / \".dvc\" / \"tmp\" / BaseExecutor.PACKED_ARGS_FILE).exists()\n\n\ndef test_list(tmp_dir, scm, dvc, exp_stage):\n    baseline_old = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_a = first(results)\n    ref_info_a = first(exp_refs_by_rev(scm, exp_a))\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp_b = first(results)\n    ref_info_b = first(exp_refs_by_rev(scm, exp_b))\n\n    tmp_dir.scm_gen(\"new\", \"new\", commit=\"new\")\n    baseline_new = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=4\"])\n    exp_c = first(results)\n    ref_info_c = first(exp_refs_by_rev(scm, exp_c))\n\n    assert dvc.experiments.ls() == {baseline_new: [(ref_info_c.name, exp_c)]}\n\n    exp_list = dvc.experiments.ls(rev=ref_info_a.baseline_sha)\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, exp_a), (ref_info_b.name, exp_b)}\n    }\n\n    exp_list = dvc.experiments.ls(rev=[baseline_old, baseline_new])\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, exp_a), (ref_info_b.name, exp_b)},\n        baseline_new: {(ref_info_c.name, exp_c)},\n    }\n\n    exp_list = dvc.experiments.ls(all_commits=True)\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, exp_a), (ref_info_b.name, exp_b)},\n        baseline_new: {(ref_info_c.name, exp_c)},\n    }\n\n    scm.checkout(\"branch\", True)\n    exp_list = dvc.experiments.ls(all_commits=True)\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, exp_a), (ref_info_b.name, exp_b)},\n        baseline_new: {(ref_info_c.name, exp_c)},\n    }\n\n\ndef test_subdir(tmp_dir, scm, dvc, workspace):\n    subdir = tmp_dir / \"dir\"\n    subdir.gen(\"copy.py\", COPY_SCRIPT)\n    subdir.gen(\"params.yaml\", \"foo: 1\")\n\n    with subdir.chdir():\n        dvc.run(\n            cmd=\"python copy.py params.yaml metrics.yaml\",\n            metrics_no_cache=[\"metrics.yaml\"],\n            params=[\"foo\"],\n            name=\"copy-file\",\n            no_exec=True,\n        )\n        scm.add([subdir / \"dvc.yaml\", subdir / \"copy.py\", subdir / \"params.yaml\"])\n        scm.commit(\"init\")\n\n        results = dvc.experiments.run(\n            PROJECT_FILE, params=[\"foo=2\"], tmp_dir=not workspace\n        )\n        assert results\n\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n\n    fs = scm.get_fs(exp)\n    for fname in [\"metrics.yaml\", \"dvc.lock\"]:\n        assert fs.exists(f\"dir/{fname}\")\n    with fs.open(\"dir/metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"foo: 2\"\n\n    assert dvc.experiments.get_exact_name([exp])[exp] == ref_info.name\n    assert resolve_rev(scm, ref_info.name) == exp\n\n\ndef test_subrepo(tmp_dir, request, scm, workspace):\n    from dvc.testing.tmp_dir import make_subrepo\n\n    subrepo = tmp_dir / \"dir\" / \"repo\"\n    make_subrepo(subrepo, scm)\n    request.addfinalizer(subrepo.dvc.close)\n\n    subrepo.gen(\"copy.py\", COPY_SCRIPT)\n    subrepo.gen(\"params.yaml\", \"foo: 1\")\n\n    with subrepo.chdir():\n        subrepo.dvc.run(\n            cmd=\"python copy.py params.yaml metrics.yaml\",\n            metrics_no_cache=[\"metrics.yaml\"],\n            params=[\"foo\"],\n            name=\"copy-file\",\n            no_exec=True,\n        )\n        scm.add([subrepo / \"dvc.yaml\", subrepo / \"copy.py\", subrepo / \"params.yaml\"])\n        scm.commit(\"init\")\n\n        results = subrepo.dvc.experiments.run(\n            PROJECT_FILE, params=[\"foo=2\"], tmp_dir=not workspace\n        )\n        assert results\n\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n\n    fs = scm.get_fs(exp)\n    for fname in [\"metrics.yaml\", \"dvc.lock\"]:\n        assert fs.exists(f\"dir/repo/{fname}\")\n    with fs.open(\"dir/repo/metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n        assert fobj.read().strip() == \"foo: 2\"\n\n    assert subrepo.dvc.experiments.get_exact_name([exp])[exp] == ref_info.name\n    assert resolve_rev(scm, ref_info.name) == exp\n\n\ndef test_run_celery(tmp_dir, scm, dvc, exp_stage, mocker):\n    \"\"\"Test running with full (non-pytest-celery) dvc-task queue.\"\"\"\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], queue=True)\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], queue=True)\n    assert len(dvc.experiments.stash_revs) == 2\n\n    repro_spy = mocker.spy(dvc.experiments, \"reproduce_celery\")\n    results = dvc.experiments.run(run_all=True)\n    assert len(results) == 2\n    repro_spy.assert_called_once_with(jobs=1)\n\n    expected = {\"foo: 2\", \"foo: 3\"}\n    metrics = set()\n    for exp in results:\n        fs = scm.get_fs(exp)\n        with fs.open(\"metrics.yaml\", mode=\"r\", encoding=\"utf-8\") as fobj:\n            metrics.add(fobj.read().strip())\n    assert expected == metrics\n\n\ndef test_checkout_targets_deps(tmp_dir, scm, dvc, exp_stage):\n    from dvc.utils.fs import remove\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"}, commit=\"add files\")\n    stage = dvc.stage.add(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\", \"foo\"],\n        force=True,\n    )\n    remove(\"foo\")\n    remove(\"bar\")\n\n    dvc.experiments.run(stage.addressing, params=[\"foo=2\"])\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert not (tmp_dir / \"bar\").exists()\n\n\n@pytest.mark.parametrize(\"tail\", [\"\", \"~1\", \"^\"])\ndef test_fix_exp_head(tmp_dir, scm, tail):\n    from dvc.repo.experiments.refs import EXEC_BASELINE\n    from dvc.repo.experiments.utils import fix_exp_head\n\n    head = \"HEAD\" + tail\n    assert head == fix_exp_head(scm, head)\n\n    rev = \"1\" * 40\n    scm.set_ref(EXEC_BASELINE, rev)\n    assert EXEC_BASELINE + tail == fix_exp_head(scm, head)\n    assert \"foo\" + tail == fix_exp_head(scm, \"foo\" + tail)\n\n\n@pytest.mark.parametrize(\n    \"params, target\",\n    itertools.product((\"foo: 1\", \"foo: 2\"), (True, False)),\n)\ndef test_modified_data_dep(tmp_dir, scm, dvc, workspace, params, target, copy_script):\n    tmp_dir.dvc_gen(\"data\", \"data\")\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    exp_stage = dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\", \"data\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"metrics.yaml\",\n            \"data.dvc\",\n            \".gitignore\",\n        ]\n    )\n    scm.commit(\"init\")\n\n    tmp_dir.gen(\"params.yaml\", params)\n    tmp_dir.gen(\"data\", \"modified\")\n\n    results = dvc.experiments.run(\n        exp_stage.addressing if target else None, tmp_dir=not workspace\n    )\n    exp = first(results)\n\n    for rev in dvc.brancher(revs=[exp]):\n        if rev != exp:\n            continue\n        with dvc.dvcfs.open(\"metrics.yaml\") as fobj:\n            assert fobj.read().strip() == params\n        with dvc.dvcfs.open(\"data\") as fobj:\n            assert fobj.read().strip() == \"modified\"\n\n    if workspace:\n        assert (tmp_dir / \"metrics.yaml\").read_text().strip() == params\n        assert (tmp_dir / \"data\").read_text().strip() == \"modified\"\n\n\ndef test_exp_run_recursive(tmp_dir, scm, dvc, run_copy_metrics):\n    tmp_dir.dvc_gen(\"metric_t.json\", '{\"foo\": 1}')\n    run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        metrics=[\"metric.json\"],\n        no_exec=True,\n        name=\"copy-metric\",\n    )\n    assert dvc.experiments.run(\".\", recursive=True)\n    assert (tmp_dir / \"metric.json\").parse() == {\"foo\": 1}\n\n\ndef test_experiment_name_invalid(tmp_dir, scm, dvc, exp_stage, mocker):\n    from dvc.exceptions import InvalidArgumentError\n\n    new_mock = mocker.spy(BaseStashQueue, \"_stash_exp\")\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.run(exp_stage.addressing, name=\"fo^o\", params=[\"foo=3\"])\n    new_mock.assert_not_called()\n\n\ndef test_experiments_workspace_not_log_exception(caplog, dvc, scm):\n    \"\"\"Experiments run in workspace should not log exception.\n\n    Instead it should just leave it to be handled in the main entrypoints.\n    \"\"\"\n    with caplog.at_level(logging.ERROR):\n        with pytest.raises(StageFileDoesNotExistError):\n            dvc.experiments.run()\n\n    assert not caplog.text\n\n\n@pytest.mark.vscode\ndef test_run_env(tmp_dir, dvc, scm, mocker):\n    dump_run_env = dedent(\n        \"\"\"\\\n        import os\n        from dvc.env import (\n            DVC_EXP_BASELINE_REV,\n            DVC_EXP_NAME,\n            DVC_ROOT,\n            DVC_STUDIO_OFFLINE,\n            DVC_STUDIO_REPO_URL,\n            DVC_STUDIO_TOKEN,\n            DVC_STUDIO_URL\n        )\n        for v in (\n            DVC_EXP_BASELINE_REV,\n            DVC_EXP_NAME,\n            DVC_ROOT,\n            DVC_STUDIO_OFFLINE,\n            DVC_STUDIO_REPO_URL,\n            DVC_STUDIO_TOKEN,\n            DVC_STUDIO_URL\n        ):\n            with open(v, \"w\") as f:\n                f.write(os.environ.get(v, \"\"))\n        \"\"\"\n    )\n    mocker.patch(\n        \"dvc.repo.experiments.queue.base.get_studio_config\",\n        return_value={\n            \"token\": \"TOKEN\",\n            \"repo_url\": \"REPO_URL\",\n            \"url\": \"BASE_URL\",\n            \"offline\": \"false\",\n        },\n    )\n    (tmp_dir / \"dump_run_env.py\").write_text(dump_run_env)\n    baseline = scm.get_rev()\n    dvc.stage.add(cmd=\"python dump_run_env.py\", name=\"run_env\")\n    dvc.experiments.run()\n    assert (tmp_dir / DVC_EXP_BASELINE_REV).read_text().strip() == baseline\n    assert (tmp_dir / DVC_EXP_NAME).read_text().strip()\n    assert (tmp_dir / DVC_ROOT).read_text().strip() == dvc.root_dir\n    assert (tmp_dir / DVC_STUDIO_TOKEN).read_text().strip() == \"TOKEN\"\n    assert (tmp_dir / DVC_STUDIO_REPO_URL).read_text().strip() == \"REPO_URL\"\n    assert (tmp_dir / DVC_STUDIO_URL).read_text().strip() == \"BASE_URL\"\n    assert (tmp_dir / DVC_STUDIO_OFFLINE).read_text().strip() == \"false\"\n\n    dvc.experiments.run(name=\"foo\")\n    assert (tmp_dir / DVC_EXP_BASELINE_REV).read_text().strip() == baseline\n    assert (tmp_dir / DVC_EXP_NAME).read_text().strip() == \"foo\"\n\n\ndef test_experiment_unchanged(tmp_dir, scm, dvc, exp_stage):\n    dvc.experiments.run(exp_stage.addressing)\n    dvc.experiments.run(exp_stage.addressing)\n\n    assert len(dvc.experiments.ls()[scm.get_rev()]) == 2\n\n\ndef test_experiment_run_dry(tmp_dir, scm, dvc, exp_stage, mocker):\n    repro = mocker.spy(dvc.experiments, \"reproduce_one\")\n    dvc.experiments.run(exp_stage.addressing, dry=True)\n\n    assert len(dvc.experiments.ls()[\"master\"]) == 0\n    assert repro.call_args.kwargs[\"tmp_dir\"] is True\n\n\ndef test_clean(tmp_dir, scm, dvc, mocker):\n    clean = mocker.spy(dvc.experiments.celery_queue.celery, \"clean\")\n    dvc.experiments.clean()\n    clean.assert_called_once_with()\n\n\ndef test_experiment_no_commit(tmp_dir):\n    from scmrepo.git import Git\n\n    from dvc.repo import Repo\n\n    Git.init(tmp_dir.fs_path).close()\n\n    repo = Repo.init()\n    assert repo.scm.no_commits\n\n    try:\n        with pytest.raises(SCMError):  # noqa: PT011\n            repo.experiments.ls()\n    finally:\n        repo.close()\n\n\ndef test_local_config_is_propagated_to_tmp(tmp_dir, scm, dvc):\n    with dvc.config.edit(\"local\") as conf:\n        conf[\"cache\"][\"type\"] = \"hardlink\"\n\n    stage = dvc.stage.add(\n        cmd=\"cat .dvc/config.local > file\", name=\"foo\", outs_no_cache=[\"file\"]\n    )\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    results = dvc.experiments.run(stage.addressing, tmp_dir=True)\n    exp = first(results)\n    fs = scm.get_fs(exp)\n\n    with fs.open(\"file\") as fobj:\n        conf_obj = ConfigObj(fobj)\n        assert conf_obj[\"cache\"][\"type\"] == \"hardlink\"\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\ndef test_untracked_top_level_files_are_included_in_exp(tmp_dir, scm, dvc, tmp):\n    (tmp_dir / \"dvc.yaml\").dump(\n        {\"metrics\": [\"metrics.json\"], \"params\": [\"params.yaml\"], \"plots\": [\"plots.csv\"]}\n    )\n    stage = dvc.stage.add(\n        cmd=\"touch metrics.json && touch params.yaml && touch plots.csv\",\n        name=\"top-level\",\n    )\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n    results = dvc.experiments.run(stage.addressing, tmp_dir=tmp)\n    exp = first(results)\n    fs = scm.get_fs(exp)\n    for file in [\"metrics.json\", \"params.yaml\", \"plots.csv\"]:\n        assert fs.exists(file)\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\ndef test_copy_paths(tmp_dir, scm, dvc, tmp):\n    stage = dvc.stage.add(cmd=\"cat file && ls dir\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    (tmp_dir / \"dir\").mkdir()\n    (tmp_dir / \"dir\" / \"file\").write_text(\"dir/file\")\n    scm.ignore(tmp_dir / \"dir\")\n    (tmp_dir / \"file\").write_text(\"file\")\n    scm.ignore(tmp_dir / \"file\")\n\n    results = dvc.experiments.run(\n        stage.addressing, tmp_dir=tmp, copy_paths=[\"dir\", \"file\"]\n    )\n    exp = first(results)\n    fs = scm.get_fs(exp)\n    assert not fs.exists(\"dir\")\n    assert not fs.exists(\"file\")\n\n\ndef test_copy_paths_errors(tmp_dir, scm, dvc, mocker):\n    stage = dvc.stage.add(cmd=\"echo foo\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    with pytest.raises(DvcException, match=\"Unable to copy\"):\n        dvc.experiments.run(stage.addressing, tmp_dir=True, copy_paths=[\"foo\"])\n\n    (tmp_dir / \"foo\").write_text(\"foo\")\n    mocker.patch(\"shutil.copy\", side_effect=OSError)\n\n    with pytest.raises(DvcException, match=\"Unable to copy\"):\n        dvc.experiments.run(stage.addressing, tmp_dir=True, copy_paths=[\"foo\"])\n\n\ndef test_mixed_git_dvc_out(tmp_dir, scm, dvc, exp_stage):\n    (tmp_dir / \"dir\").mkdir()\n    dir_metrics = os.path.join(\"dir\", \"metrics.yaml\")\n    dvc.stage.add(\n        cmd=f\"python copy.py params.yaml {dir_metrics}\",\n        metrics=[dir_metrics],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n        force=True,\n    )\n    dvc.stage.add(\n        cmd=f\"python copy.py {dir_metrics} metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        name=\"copy-dir-file\",\n        deps=[\"dir\"],\n    )\n    scm.add([\"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"add dir stage\")\n\n    exp = first(dvc.experiments.run())\n    assert (tmp_dir / \"dir\" / \"metrics.yaml\").exists()\n    git_fs = scm.get_fs(exp)\n    assert not git_fs.exists(\"dir/metrics.yaml\")\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\ndef test_custom_commit_message(tmp_dir, scm, dvc, tmp):\n    stage = dvc.stage.add(cmd=\"echo foo\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    exp = first(\n        dvc.experiments.run(\n            stage.addressing, tmp_dir=tmp, message=\"custom commit message\"\n        )\n    )\n    assert scm.resolve_commit(exp).message == \"custom commit message\"\n\n\n@pytest.mark.parametrize(\"dep\", [\"submodule\", \"submodule/file\"])\ndef test_experiments_run_with_submodule_dependencies(dvc, scm, make_tmp_dir, dep):\n    external_repo = make_tmp_dir(\"external_repo\", scm=True)\n    external_repo.scm_gen(\"file\", \"content\", commit=\"add file\")\n\n    submodules = scm.pygit2.repo.submodules\n    submodules.add(os.fspath(external_repo), \"submodule\")\n    submodules.update(init=True)\n    scm.add_commit([\".gitmodules\"], message=\"add submodule\")\n\n    dvc.stage.add(cmd=\"echo foo\", deps=[dep], name=\"foo\")\n\n    assert dvc.experiments.run()\n\n\n@pytest.mark.skipif(dulwich.__version__ < (0, 24, 2), reason=\"requires dulwich>=0.24.2\")\ndef test_experiments_run_in_linked_git_worktree(\n    dvc, scm, tmp_path_factory: pytest.TempPathFactory, monkeypatch\n):\n    from dulwich.worktree import add_worktree\n\n    wt = tmp_path_factory.mktemp(\"worktrees\") / \"worktree\"\n    add_worktree(scm.dulwich.repo, wt, branch=\"wt-main\")\n\n    monkeypatch.chdir(wt)\n\n    wt_dvc = Repo(os.fspath(wt))\n    (wt / \"foo\").write_bytes(b\"foo\")\n    wt_dvc.stage.add(cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"], name=\"cp\")\n\n    results = wt_dvc.experiments.run(name=\"my-exp\")\n    assert results\n    rev = first(results)\n    assert rev\n    # If `bar` exists, we know that the stage was run.\n    assert (wt / \"bar\").read_bytes() == b\"foo\"\n"
  },
  {
    "path": "tests/func/experiments/test_queue.py",
    "content": "import pytest\nfrom funcy import first\n\n\ndef to_dict(tasks):\n    status_dict = {}\n    for task in tasks:\n        status_dict[task[\"name\"]] = task[\"status\"]\n    return status_dict\n\n\n@pytest.mark.parametrize(\"follow\", [True, False])\ndef test_celery_logs(tmp_dir, scm, dvc, failed_exp_stage, follow, capsys, test_queue):\n    celery_queue = dvc.experiments.celery_queue\n    dvc.experiments.run(failed_exp_stage.addressing, queue=True, name=\"foo\")\n    dvc.experiments.run(run_all=True)\n    test_queue.wait([\"foo\"])\n\n    done_result = first(celery_queue.iter_done())\n\n    name = done_result.entry.stash_rev\n    captured = capsys.readouterr()\n    celery_queue.logs(name, follow=follow)\n    captured = capsys.readouterr()\n    assert \"failed to reproduce 'failed-copy-file'\" in captured.out\n\n\ndef test_queue_doesnt_remove_untracked_params_file(tmp_dir, dvc, scm):\n    \"\"\"Regression test for https://github.com/treeverse/dvc/issues/7842\"\"\"\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    stage = dvc.run(cmd=\"echo ${foo}\", params=[\"foo\"], name=\"echo-foo\")\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \".gitignore\"])\n    scm.commit(\"init\")\n    dvc.experiments.run(stage.addressing, params=[\"foo=2\"], queue=True)\n    assert (tmp_dir / \"params.yaml\").exists()\n\n\ndef test_copy_paths_queue(tmp_dir, scm, dvc):\n    stage = dvc.stage.add(cmd=\"cat file && ls dir\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    (tmp_dir / \"dir\").mkdir()\n    (tmp_dir / \"dir\" / \"file\").write_text(\"dir/file\")\n    scm.ignore(tmp_dir / \"dir\")\n    (tmp_dir / \"file\").write_text(\"file\")\n    scm.ignore(tmp_dir / \"file\")\n\n    dvc.experiments.run(stage.addressing, queue=True)\n    results = dvc.experiments.run(run_all=True)\n\n    exp = first(results)\n    fs = scm.get_fs(exp)\n    assert not fs.exists(\"dir\")\n    assert not fs.exists(\"file\")\n\n\ndef test_custom_commit_message_queue(tmp_dir, scm, dvc):\n    stage = dvc.stage.add(cmd=\"echo foo\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    dvc.experiments.run(stage.addressing, queue=True, message=\"custom commit message\")\n\n    exp = first(dvc.experiments.run(run_all=True))\n    assert scm.resolve_commit(exp).message == \"custom commit message\"\n"
  },
  {
    "path": "tests/func/experiments/test_remote.py",
    "content": "import logging\n\nimport pytest\nfrom dulwich.porcelain import pull as git_pull\nfrom funcy import first\n\nfrom dvc.repo.experiments.utils import exp_refs_by_rev\n\n\n@pytest.mark.parametrize(\"use_url\", [True, False])\ndef test_push(tmp_dir, scm, dvc, git_upstream, exp_stage, use_url):\n    from dvc.exceptions import InvalidArgumentError\n\n    remote = git_upstream.url if use_url else git_upstream.remote\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.push(remote, [\"foo\"])\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    dvc.experiments.push(remote, [ref_info1.name, ref_info2.name])\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info3)) is None\n\n    git_upstream.tmp_dir.scm.remove_ref(str(ref_info1))\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info1)) is None\n\n    dvc.experiments.push(remote, [ref_info1.name])\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n\n    dvc.experiments.push(remote)\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info3)) == exp3\n\n\n@pytest.mark.parametrize(\"all_,rev,result3\", [(True, False, True), (False, True, None)])\ndef test_push_args(tmp_dir, scm, dvc, git_upstream, exp_stage, all_, rev, result3):\n    remote = git_upstream.url\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    if rev:\n        rev = baseline\n    dvc.experiments.push(remote, [], all_commits=all_, rev=rev)\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    if result3:\n        result3 = exp3\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info3)) == result3\n\n\ndef test_push_multi_rev(tmp_dir, scm, dvc, git_upstream, exp_stage):\n    remote = git_upstream.url\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    dvc.experiments.push(remote, [], rev=[baseline, scm.get_rev()])\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info3)) == exp3\n\n\ndef test_push_diverged(tmp_dir, scm, dvc, git_upstream, exp_stage):\n    git_upstream.tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"init\")\n    remote_rev = git_upstream.tmp_dir.scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n\n    git_upstream.tmp_dir.scm.set_ref(str(ref_info), remote_rev)\n\n    assert dvc.experiments.push(git_upstream.remote, [ref_info.name]) == {\n        \"diverged\": [ref_info.name],\n        \"url\": None,\n        \"uploaded\": 0,\n    }\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info)) == remote_rev\n\n    dvc.experiments.push(git_upstream.remote, [ref_info.name], force=True)\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info)) == exp\n\n\ndef test_push_ambiguous_name(tmp_dir, scm, dvc, git_upstream, exp_stage):\n    from dvc.exceptions import InvalidArgumentError\n\n    remote = git_upstream.remote\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=\"foo\")\n    exp_a = first(results)\n    ref_info_a = first(exp_refs_by_rev(scm, exp_a))\n\n    tmp_dir.scm_gen(\"new\", \"new\", commit=\"new\")\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], name=\"foo\")\n    exp_b = first(results)\n    ref_info_b = first(exp_refs_by_rev(scm, exp_b))\n\n    dvc.experiments.push(remote, [\"foo\"])\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info_b)) == exp_b\n\n    tmp_dir.scm_gen(\"new\", \"new 2\", commit=\"new 2\")\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.push(remote, [\"foo\"])\n\n    dvc.experiments.push(remote, [str(ref_info_a)])\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info_a)) == exp_a\n\n\n@pytest.mark.parametrize(\"use_url\", [True, False])\ndef test_list_remote(tmp_dir, scm, dvc, git_downstream, exp_stage, use_url):\n    baseline_old = scm.get_rev()\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_a = first(results)\n    ref_info_a = first(exp_refs_by_rev(scm, exp_a))\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp_b = first(results)\n    ref_info_b = first(exp_refs_by_rev(scm, exp_b))\n\n    tmp_dir.scm_gen(\"new\", \"new\", commit=\"new\")\n    baseline_new = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=4\"])\n    exp_c = first(results)\n    ref_info_c = first(exp_refs_by_rev(scm, exp_c))\n\n    remote = git_downstream.url if use_url else git_downstream.remote\n\n    assert git_downstream.tmp_dir.scm.get_ref(\"HEAD\") != scm.get_ref(\"HEAD\")\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    assert downstream_exp.ls(git_remote=remote) == {}\n\n    git_downstream.tmp_dir.scm.fetch_refspecs(remote, [\"master:master\"])\n    exp_list = downstream_exp.ls(rev=baseline_old, git_remote=remote)\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, None), (ref_info_b.name, None)}\n    }\n\n    exp_list = downstream_exp.ls(all_commits=True, git_remote=remote)\n    assert {key: set(val) for key, val in exp_list.items()} == {\n        baseline_old: {(ref_info_a.name, None), (ref_info_b.name, None)},\n        baseline_new: {(ref_info_c.name, None)},\n    }\n\n\n@pytest.mark.parametrize(\"use_url\", [True, False])\ndef test_pull(tmp_dir, scm, dvc, git_downstream, exp_stage, use_url):\n    from dvc.exceptions import InvalidArgumentError\n\n    # pull to downstream so both repos start from same commit\n    git_pull(git_downstream.tmp_dir, \"upstream\")\n\n    remote = git_downstream.url if use_url else git_downstream.remote\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    with pytest.raises(InvalidArgumentError):\n        downstream_exp.pull(remote, [\"foo\"])\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    downstream_exp.pull(\n        git_downstream.remote, [ref_info1.name, ref_info2.name], force=True\n    )\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info3)) is None\n\n    git_downstream.tmp_dir.scm.remove_ref(str(ref_info1))\n\n    downstream_exp.pull(remote, [str(ref_info1)])\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n\n    downstream_exp.pull(remote)\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info3)) == exp3\n\n\n@pytest.mark.parametrize(\"all_,rev,result3\", [(True, False, True), (False, True, None)])\ndef test_pull_args(tmp_dir, scm, dvc, git_downstream, exp_stage, all_, rev, result3):\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    if rev:\n        rev = baseline\n\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    git_downstream.tmp_dir.scm.fetch_refspecs(str(tmp_dir), [\"master:master\"])\n    downstream_exp.pull(git_downstream.remote, [], all_commits=all_, rev=rev)\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    if result3:\n        result3 = exp3\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info3)) == result3\n\n\ndef test_pull_multi_rev(tmp_dir, scm, dvc, git_downstream, exp_stage):\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    exp1 = first(results)\n    ref_info1 = first(exp_refs_by_rev(scm, exp1))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp2 = first(results)\n    ref_info2 = first(exp_refs_by_rev(scm, exp2))\n\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp3 = first(results)\n    ref_info3 = first(exp_refs_by_rev(scm, exp3))\n\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    git_downstream.tmp_dir.scm.fetch_refspecs(str(tmp_dir), [\"master:master\"])\n    downstream_exp.pull(git_downstream.remote, [], rev=[baseline, scm.get_rev()])\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info1)) == exp1\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info2)) == exp2\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info3)) == exp3\n\n\ndef test_pull_diverged(tmp_dir, scm, dvc, git_downstream, exp_stage):\n    git_downstream.tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"init\")\n    remote_rev = git_downstream.tmp_dir.scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n\n    git_downstream.tmp_dir.scm.set_ref(str(ref_info), remote_rev)\n\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    assert downstream_exp.pull(git_downstream.remote, ref_info.name) == []\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info)) == remote_rev\n\n    downstream_exp.pull(git_downstream.remote, ref_info.name, force=True)\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info)) == exp\n\n\ndef test_pull_ambiguous_name(tmp_dir, scm, dvc, git_downstream, exp_stage):\n    from dvc.exceptions import InvalidArgumentError\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=\"foo\")\n    exp_a = first(results)\n    ref_info_a = first(exp_refs_by_rev(scm, exp_a))\n\n    tmp_dir.scm_gen(\"new\", \"new\", commit=\"new\")\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], name=\"foo\")\n    exp_b = first(results)\n    ref_info_b = first(exp_refs_by_rev(scm, exp_b))\n\n    remote = git_downstream.remote\n    downstream_exp = git_downstream.tmp_dir.dvc.experiments\n    with pytest.raises(InvalidArgumentError):\n        downstream_exp.pull(remote, [\"foo\"])\n\n    downstream_exp.pull(remote, [str(ref_info_b)])\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info_b)) == exp_b\n\n    with git_downstream.tmp_dir.scm.detach_head(ref_info_a.baseline_sha):\n        downstream_exp.pull(remote, [\"foo\"])\n    assert git_downstream.tmp_dir.scm.get_ref(str(ref_info_a)) == exp_a\n\n\ndef test_auth_error_list(tmp_dir, scm, dvc, http_auth_patch):\n    from dvc.scm import GitAuthError\n\n    with pytest.raises(\n        GitAuthError,\n        match=f\"Authentication failed for: '{http_auth_patch}'\",\n    ):\n        dvc.experiments.ls(git_remote=http_auth_patch)\n\n\ndef test_auth_error_pull(tmp_dir, scm, dvc, http_auth_patch):\n    from dvc.scm import GitAuthError\n\n    with pytest.raises(\n        GitAuthError,\n        match=f\"Authentication failed for: '{http_auth_patch}'\",\n    ):\n        dvc.experiments.pull(http_auth_patch, [\"foo\"])\n\n\ndef test_auth_error_push(tmp_dir, scm, dvc, exp_stage, http_auth_patch):\n    from dvc.scm import GitAuthError\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp = first(results)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n\n    with pytest.raises(\n        GitAuthError,\n        match=f\"Authentication failed for: '{http_auth_patch}'\",\n    ):\n        dvc.experiments.push(http_auth_patch, [ref_info.name])\n\n\n@pytest.mark.parametrize(\"use_ref\", [True, False])\ndef test_get(tmp_dir, scm, dvc, exp_stage, erepo_dir, use_ref):\n    from dvc.repo import Repo\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_rev = first(results)\n    exp_ref = first(exp_refs_by_rev(scm, exp_rev))\n\n    with erepo_dir.chdir():\n        Repo.get(str(tmp_dir), \"params.yaml\", rev=exp_ref.name if use_ref else exp_rev)\n        assert (erepo_dir / \"params.yaml\").read_text().strip() == \"foo: 2\"\n\n\ndef test_push_pull_invalid_workspace(\n    tmp_dir, scm, dvc, git_upstream, exp_stage, local_remote, caplog\n):\n    dvc.experiments.run()\n\n    with open(\"dvc.yaml\", mode=\"a\") as f:\n        f.write(\"\\ninvalid\")\n\n    with caplog.at_level(logging.WARNING, logger=\"dvc\"):\n        dvc.experiments.push(git_upstream.remote, push_cache=True)\n        dvc.experiments.pull(git_upstream.remote, pull_cache=True)\n        assert \"failed to collect\" not in caplog.text\n\n\n@pytest.mark.parametrize(\n    \"auto_push, expected_key\", [(True, \"up_to_date\"), (False, \"success\")]\n)\ndef test_auto_push_on_run(\n    tmp_dir, scm, dvc, git_upstream, local_remote, exp_stage, auto_push, expected_key\n):\n    remote = git_upstream.remote\n\n    with dvc.config.edit() as conf:\n        conf[\"exp\"][\"auto_push\"] = auto_push\n        conf[\"exp\"][\"git_remote\"] = remote\n\n    exp_name = \"foo\"\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=exp_name)\n\n    assert first(dvc.experiments.push(name=exp_name, git_remote=remote)) == expected_key\n\n\n@pytest.mark.parametrize(\n    \"auto_push, expected_key\", [(True, \"up_to_date\"), (False, \"success\")]\n)\ndef test_auto_push_on_save(\n    tmp_dir, scm, dvc, git_upstream, local_remote, exp_stage, auto_push, expected_key\n):\n    remote = git_upstream.remote\n    exp_name = \"foo\"\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=exp_name)\n\n    with dvc.config.edit() as conf:\n        conf[\"exp\"][\"auto_push\"] = auto_push\n        conf[\"exp\"][\"git_remote\"] = remote\n\n    dvc.experiments.save(name=exp_name, force=True)\n\n    assert first(dvc.experiments.push(name=exp_name, git_remote=remote)) == expected_key\n\n\ndef test_auto_push_misconfigured(\n    tmp_dir, scm, dvc, git_upstream, local_remote, exp_stage, caplog\n):\n    with dvc.config.edit() as conf:\n        conf[\"exp\"][\"auto_push\"] = True\n        conf[\"exp\"][\"git_remote\"] = \"notfound\"\n\n    exp_name = \"foo\"\n    with caplog.at_level(logging.WARNING, logger=\"dvc\"):\n        dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], name=exp_name)\n        assert \"Failed to validate remotes\" in caplog.text\n\n\ndef test_auto_push_tmp_dir(tmp_dir, scm, dvc, git_upstream, local_remote, exp_stage):\n    remote = git_upstream.remote\n\n    with dvc.config.edit() as conf:\n        conf[\"exp\"][\"auto_push\"] = True\n        conf[\"exp\"][\"git_remote\"] = remote\n\n    exp_name = \"foo\"\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], name=exp_name, tmp_dir=True\n    )\n\n    assert first(dvc.experiments.push(name=exp_name, git_remote=remote)) == \"up_to_date\"\n"
  },
  {
    "path": "tests/func/experiments/test_remove.py",
    "content": "import pytest\nfrom funcy import first\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.repo.experiments.exceptions import UnresolvedExpNamesError\nfrom dvc.repo.experiments.utils import exp_refs_by_rev\n\n\ndef test_remove_experiments_by_ref(tmp_dir, scm, dvc, exp_stage, caplog):\n    queue_length = 3\n    ref_info_list = []\n    ref_name_list = []\n\n    for i in range(queue_length):\n        results = dvc.experiments.run(exp_stage.addressing, params=[f\"foo={i}\"])\n        ref_info = first(exp_refs_by_rev(scm, first(results)))\n        ref_info_list.append(ref_info)\n        ref_name_list.append(str(ref_info))\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.remove([*ref_name_list[:2], \"non-exist\"])\n    assert scm.get_ref(ref_name_list[0]) is not None\n    assert scm.get_ref(ref_name_list[1]) is not None\n    assert scm.get_ref(ref_name_list[2]) is not None\n\n    assert set(dvc.experiments.remove(ref_name_list[:2])) == set(ref_name_list[:2])\n    assert scm.get_ref(ref_name_list[0]) is None\n    assert scm.get_ref(ref_name_list[1]) is None\n    assert scm.get_ref(ref_name_list[2]) is not None\n\n\ndef test_remove_all_queued_experiments(tmp_dir, scm, dvc, exp_stage):\n    queue_length = 3\n    for i in range(queue_length):\n        dvc.experiments.run(exp_stage.addressing, params=[f\"foo={i}\"], queue=True)\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[f\"foo={queue_length}\"])\n    ref_info = first(exp_refs_by_rev(scm, first(results)))\n\n    assert len(dvc.experiments.stash_revs) == queue_length\n    assert len(dvc.experiments.remove(queue=True)) == queue_length\n    assert len(dvc.experiments.stash_revs) == 0\n    assert scm.get_ref(str(ref_info)) is not None\n\n\ndef test_remove_all_experiments_queued_and_completed(tmp_dir, scm, dvc, exp_stage):\n    queue_length = 3\n    for i in range(queue_length):\n        dvc.experiments.run(\n            exp_stage.addressing, params=[f\"foo={i}\"], name=f\"exp{i}\", queue=True\n        )\n\n    results = dvc.experiments.run(\n        exp_stage.addressing, params=[f\"foo={queue_length}\"], name=f\"exp{queue_length}\"\n    )\n    ref_info = first(exp_refs_by_rev(scm, first(results)))\n\n    removed = sorted(dvc.experiments.remove(all_commits=True, queue=True))\n\n    assert len(removed) == queue_length + 1\n    assert removed == [f\"exp{i}\" for i in range(queue_length)] + [ref_info.name]\n    assert len(dvc.experiments.stash_revs) == 0\n    assert scm.get_ref(str(ref_info)) is None\n\n\ndef test_remove_special_queued_experiments(tmp_dir, scm, dvc, exp_stage):\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=1\"], queue=True, name=\"queue1\"\n    )\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], queue=True, name=\"queue2\"\n    )\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=3\"], queue=True, name=\"queue3\"\n    )\n    queue_revs = {\n        entry.name: entry.stash_rev\n        for entry in dvc.experiments.celery_queue.iter_queued()\n    }\n    assert len(queue_revs) == 3\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=4\"])\n    ref_info1 = first(exp_refs_by_rev(scm, first(results)))\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=5\"])\n    ref_info2 = first(exp_refs_by_rev(scm, first(results)))\n\n    assert scm.get_ref(str(ref_info1)) is not None\n    assert scm.get_ref(str(ref_info2)) is not None\n\n    rev2 = queue_revs[\"queue2\"]\n    assert set(dvc.experiments.remove([\"queue1\", rev2[:5], str(ref_info1)])) == {\n        \"queue1\",\n        rev2[:5],\n        str(ref_info1),\n    }\n    assert len(list(dvc.experiments.celery_queue.iter_queued())) == 1\n    assert scm.get_ref(str(ref_info1)) is None\n    assert scm.get_ref(str(ref_info2)) is not None\n\n\ndef test_remove_all(tmp_dir, scm, dvc, exp_stage):\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    ref_info1 = first(exp_refs_by_rev(scm, first(results)))\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"], queue=True)\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"copy.py\", \"params.yaml\", \"metrics.yaml\"])\n    scm.commit(\"update baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    ref_info2 = first(exp_refs_by_rev(scm, first(results)))\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=4\"], queue=True)\n\n    assert set(dvc.experiments.remove(all_commits=True)) == {\n        ref_info1.name,\n        ref_info2.name,\n    }\n    assert len(dvc.experiments.stash_revs) == 2\n    assert scm.get_ref(str(ref_info2)) is None\n    assert scm.get_ref(str(ref_info1)) is None\n\n\n@pytest.mark.parametrize(\"use_url\", [True, False])\ndef test_remove_remote(tmp_dir, scm, dvc, exp_stage, git_upstream, use_url):\n    remote = git_upstream.url if use_url else git_upstream.remote\n\n    ref_info_list = []\n    exp_list = []\n    for i in range(3):\n        results = dvc.experiments.run(exp_stage.addressing, params=[f\"foo={i}\"])\n        exp = first(results)\n        exp_list.append(exp)\n        ref_info = first(exp_refs_by_rev(scm, exp))\n        ref_info_list.append(ref_info)\n        dvc.experiments.push(remote, [ref_info.name])\n        assert git_upstream.tmp_dir.scm.get_ref(str(ref_info)) == exp\n\n    dvc.experiments.remove(\n        git_remote=remote,\n        exp_names=[str(ref_info_list[0]), ref_info_list[1].name],\n    )\n\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info_list[0])) is None\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info_list[1])) is None\n    assert git_upstream.tmp_dir.scm.get_ref(str(ref_info_list[2])) == exp_list[2]\n\n    with pytest.raises(\n        UnresolvedExpNamesError, match=f\"Experiment 'foo' does not exist in '{remote}'\"\n    ):\n        dvc.experiments.remove(git_remote=remote, exp_names=[\"foo\"])\n\n\ndef test_remove_experiments_by_rev(tmp_dir, scm, dvc, exp_stage):\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    baseline_exp_ref = first(exp_refs_by_rev(scm, first(results)))\n\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], queue=True, name=\"queue2\"\n    )\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    ref_info = first(exp_refs_by_rev(scm, first(results)))\n    new_exp_ref = str(ref_info)\n\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=4\"], queue=True, name=\"queue4\"\n    )\n\n    assert dvc.experiments.remove(rev=baseline) == [baseline_exp_ref.name]\n    queue_revs = {\n        entry.name: entry.stash_rev\n        for entry in dvc.experiments.celery_queue.iter_queued()\n    }\n    assert scm.get_ref(str(baseline_exp_ref)) is None\n    assert \"queue2\" in queue_revs\n    assert scm.get_ref(new_exp_ref) is not None\n    assert \"queue4\" in queue_revs\n\n\ndef test_remove_multi_rev(tmp_dir, scm, dvc, exp_stage):\n    baseline = scm.get_rev()\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"])\n    baseline_exp_ref = first(exp_refs_by_rev(scm, first(results)))\n\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], queue=True, name=\"queue2\"\n    )\n    scm.commit(\"new_baseline\")\n\n    results = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    new_exp_ref = first(exp_refs_by_rev(scm, first(results)))\n\n    assert set(dvc.experiments.remove(rev=[baseline, scm.get_rev()])) == {\n        baseline_exp_ref.name,\n        new_exp_ref.name,\n    }\n\n    assert scm.get_ref(str(baseline_exp_ref)) is None\n    assert scm.get_ref(str(new_exp_ref)) is None\n\n\n@pytest.mark.parametrize(\n    \"keep, expected_removed\",\n    [\n        ([\"exp1\"], [\"exp2\", \"exp3\"]),\n        ([\"exp1\", \"exp2\"], [\"exp3\"]),\n        ([\"exp1\", \"exp2\", \"exp3\"], []),\n        ([], []),  # remove does nothing if no experiments are specified\n    ],\n)\ndef test_keep_selected_by_name(tmp_dir, scm, dvc, exp_stage, keep, expected_removed):\n    # Setup: Run experiments\n    refs = {}\n    for i in range(1, len(keep) + len(expected_removed) + 1):\n        results = dvc.experiments.run(\n            exp_stage.addressing, params=[f\"foo={i}\"], name=f\"exp{i}\"\n        )\n        refs[f\"exp{i}\"] = first(exp_refs_by_rev(scm, first(results)))\n        assert scm.get_ref(str(refs[f\"exp{i}\"])) is not None\n\n    removed = dvc.experiments.remove(exp_names=keep, keep=True)\n    assert sorted(removed) == sorted(expected_removed)\n\n    for exp in expected_removed:\n        assert scm.get_ref(str(refs[exp])) is None\n\n    for exp in keep:\n        assert scm.get_ref(str(refs[exp])) is not None\n\n\ndef test_keep_selected_by_nonexistent_name(tmp_dir, scm, dvc, exp_stage):\n    # non existent name should raise an error\n    with pytest.raises(UnresolvedExpNamesError):\n        dvc.experiments.remove(exp_names=[\"nonexistent\"], keep=True)\n\n\n@pytest.mark.parametrize(\n    \"num_exps, rev, num, expected_removed\",\n    [\n        (2, \"exp1\", 1, [\"exp2\"]),\n        (3, \"exp3\", 1, [\"exp1\", \"exp2\"]),\n        (3, \"exp3\", 2, [\"exp1\"]),\n        (3, \"exp3\", 3, []),\n        (3, \"exp2\", 2, [\"exp3\"]),\n        (4, \"exp2\", 2, [\"exp3\", \"exp4\"]),\n        (4, \"exp4\", 2, [\"exp1\", \"exp2\"]),\n        (1, None, 1, []),  # remove does nothing if no experiments are specified\n    ],\n)\ndef test_keep_selected_by_rev(\n    tmp_dir, scm, dvc, exp_stage, num_exps, rev, num, expected_removed\n):\n    refs = {}\n    revs = {}\n    # Setup: Run experiments and commit\n    for i in range(1, num_exps + 1):\n        scm.commit(f\"commit{i}\")\n        results = dvc.experiments.run(\n            exp_stage.addressing, params=[f\"foo={i}\"], name=f\"exp{i}\"\n        )\n        refs[f\"exp{i}\"] = first(exp_refs_by_rev(scm, first(results)))\n        revs[f\"exp{i}\"] = scm.get_rev()\n        assert scm.get_ref(str(refs[f\"exp{i}\"])) is not None\n\n    # Keep the experiment from the new revision\n    removed = dvc.experiments.remove(rev=revs.get(rev), num=num, keep=True)\n    assert sorted(removed) == sorted(expected_removed)\n\n    # Check remaining experiments\n    for exp in expected_removed:\n        assert scm.get_ref(str(refs[exp])) is None\n\n    for exp, ref in refs.items():\n        if exp not in expected_removed:\n            assert scm.get_ref(str(ref)) is not None\n\n\ndef test_remove_with_queue_and_keep(tmp_dir, scm, dvc, exp_stage):\n    # This should raise an exception, until decided otherwise\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.remove(queue=True, keep=True)\n"
  },
  {
    "path": "tests/func/experiments/test_rename.py",
    "content": "import pytest\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.repo.experiments.exceptions import (\n    ExperimentExistsError,\n    UnresolvedExpNamesError,\n)\nfrom dvc.repo.experiments.utils import exp_refs_by_names\n\n\ndef test_rename_experiment_by_name(scm, dvc, exp_stage):\n    dvc.experiments.run(exp_stage.addressing, name=\"test-name\", params=[\"foo=1\"])\n    old_ref = exp_refs_by_names(scm, {\"test-name\"})\n    dvc.experiments.rename(\"new-name\", \"test-name\")\n    new_ref = exp_refs_by_names(scm, {\"new-name\"})\n    assert scm.get_ref(str(old_ref[\"test-name\"][0])) is None\n    assert scm.get_ref(str(new_ref[\"new-name\"][0])) is not None\n    with pytest.raises(UnresolvedExpNamesError):\n        dvc.experiments.rename(\"new-name\", \"foo\")\n\n\ndef test_same_name(dvc, exp_stage):\n    dvc.experiments.run(exp_stage.addressing, name=\"same-name\", params=[\"foo=1\"])\n    assert dvc.experiments.rename(\"same-name\", \"same-name\") is None\n\n\ndef test_existing_name(dvc, exp_stage):\n    dvc.experiments.run(exp_stage.addressing, name=\"first-name\", params=[\"foo=1\"])\n    dvc.experiments.run(exp_stage.addressing, name=\"second-name\", params=[\"foo=2\"])\n\n    with pytest.raises(ExperimentExistsError):\n        dvc.experiments.rename(\"second-name\", \"first-name\")\n\n    dvc.experiments.rename(\"second-name\", \"first-name\", force=True)\n\n\ndef test_invalid_name(dvc, exp_stage):\n    dvc.experiments.run(exp_stage.addressing, name=\"test-name\", params=[\"foo=1\"])\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.rename(\"invalid*name\", \"test-name\")\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.rename(\"invalid/name\", \"test-name\")\n"
  },
  {
    "path": "tests/func/experiments/test_save.py",
    "content": "import pytest\nfrom funcy import first\n\nfrom dvc.repo.experiments.exceptions import ExperimentExistsError, InvalidArgumentError\nfrom dvc.repo.experiments.utils import exp_refs_by_rev\nfrom dvc.scm import resolve_rev\n\n\ndef setup_stage(tmp_dir, dvc, scm):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    dvc.run(name=\"echo-foo\", outs=[\"bar\"], cmd=\"echo foo > bar\")\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \".gitignore\", \"params.yaml\"])\n    scm.commit(\"init\")\n\n\ndef test_exp_save_unchanged(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n    dvc.experiments.save()\n\n\n@pytest.mark.parametrize(\"name\", [None, \"test\"])\ndef test_exp_save(tmp_dir, dvc, scm, name):\n    setup_stage(tmp_dir, dvc, scm)\n    baseline = scm.get_rev()\n\n    exp = dvc.experiments.save(name=name)\n    ref_info = first(exp_refs_by_rev(scm, exp))\n    assert ref_info\n    assert ref_info.baseline_sha == baseline\n\n    exp_name = name if name else ref_info.name\n    assert dvc.experiments.get_exact_name([exp])[exp] == exp_name\n    assert resolve_rev(scm, exp_name) == exp\n\n\ndef test_exp_save_overwrite_experiment(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n    name = \"dummy\"\n    dvc.experiments.save(name=name)\n\n    tmp_dir.gen(\"params.yaml\", \"foo: 2\")\n    with pytest.raises(ExperimentExistsError):\n        dvc.experiments.save(name=name)\n\n    dvc.experiments.save(name=name, force=True)\n\n\n@pytest.mark.parametrize(\n    \"name\",\n    [\n        \"invalid/name\",\n        \"invalid..name\",\n        \"invalid~name\",\n        \"invalid?name\",\n        \"invalidname.\",\n    ],\n)\ndef test_exp_save_invalid_name(tmp_dir, dvc, scm, name):\n    setup_stage(tmp_dir, dvc, scm)\n    with pytest.raises(InvalidArgumentError):\n        dvc.experiments.save(name=name, force=True)\n\n\ndef test_exp_save_after_commit(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n    baseline = scm.get_rev()\n    dvc.experiments.save(name=\"exp-1\", force=True)\n\n    tmp_dir.scm_gen({\"new_file\": \"new_file\"}, commit=\"new baseline\")\n    baseline_new = scm.get_rev()\n    dvc.experiments.save(name=\"exp-2\", force=True)\n\n    all_exps = dvc.experiments.ls(all_commits=True)\n    assert all_exps[baseline][0][0] == \"exp-1\"\n    assert all_exps[baseline_new][0][0] == \"exp-2\"\n\n\ndef test_exp_save_with_staged_changes(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n    tmp_dir.gen({\"deleted\": \"deleted\", \"modified\": \"modified\"})\n    scm.add_commit([\"deleted\", \"modified\"], \"init\")\n\n    (tmp_dir / \"deleted\").unlink()\n    tmp_dir.gen({\"new_file\": \"new_file\"})\n    (tmp_dir / \"modified\").write_text(\"foo\")\n    scm.add([\"deleted\", \"new_file\", \"modified\"])\n\n    exp_rev = dvc.experiments.save(name=\"exp\")\n    scm.checkout(exp_rev, force=True)\n    assert not (tmp_dir / \"deleted\").exists()\n    assert (tmp_dir / \"new_file\").exists()\n    assert (tmp_dir / \"modified\").read_text() == \"foo\"\n\n\ndef test_exp_save_include_untracked(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n\n    new_file = tmp_dir / \"new_file\"\n    new_file.write_text(\"new_file\")\n    dvc.experiments.save(name=\"exp\", include_untracked=[\"new_file\"])\n\n    _, _, unstaged = scm.status()\n    assert \"new_file\" in unstaged\n    assert new_file.read_text() == \"new_file\"\n\n\ndef test_exp_save_include_untracked_warning(tmp_dir, dvc, scm, mocker):\n    \"\"\"Regression test for https://github.com/treeverse/dvc/issues/9061\"\"\"\n    setup_stage(tmp_dir, dvc, scm)\n\n    new_dir = tmp_dir / \"new_dir\"\n    new_dir.mkdir()\n    (new_dir / \"foo\").write_text(\"foo\")\n    (new_dir / \"bar\").write_text(\"bar\")\n\n    logger = mocker.patch(\"dvc.repo.experiments.executor.base.logger\")\n\n    dvc.experiments.save(name=\"exp\", include_untracked=[\"new_dir\"])\n    assert not logger.warning.called\n\n\ndef test_untracked_top_level_files_are_included_in_exp(tmp_dir, scm, dvc):\n    (tmp_dir / \"dvc.yaml\").dump(\n        {\"metrics\": [\"metrics.json\"], \"params\": [\"params.yaml\"], \"plots\": [\"plots.csv\"]}\n    )\n    stage = dvc.stage.add(\n        cmd=\"touch metrics.json && touch params.yaml && touch plots.csv\",\n        name=\"top-level\",\n    )\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n    dvc.reproduce(stage.addressing)\n    exp = dvc.experiments.save()\n    fs = scm.get_fs(exp)\n    for file in [\"metrics.json\", \"params.yaml\", \"plots.csv\", \"dvc.lock\"]:\n        assert fs.exists(file)\n\n\ndef test_untracked_dvclock_is_included_in_exp(tmp_dir, scm, dvc):\n    stage = dvc.stage.add(cmd=\"echo foo\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n    dvc.reproduce(stage.addressing)\n\n    # dvc.reproduce automatically stages `dvc.lock`\n    # force it to be untracked\n    scm.reset()\n\n    exp = dvc.experiments.save()\n    fs = scm.get_fs(exp)\n    assert fs.exists(\"dvc.lock\")\n\n\ndef test_exp_save_include_untracked_force(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n\n    new_file = tmp_dir / \"new_file\"\n    new_file.write_text(\"new_file\")\n    dvc.scm.ignore(new_file)\n    exp = dvc.experiments.save(include_untracked=[\"new_file\"])\n\n    fs = scm.get_fs(exp)\n    assert fs.exists(\"new_file\")\n\n\ndef test_exp_save_custom_message(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n\n    exp = dvc.experiments.save(message=\"custom commit message\")\n    assert scm.resolve_commit(exp).message == \"custom commit message\"\n\n\ndef test_exp_save_target(tmp_dir, dvc, scm):\n    setup_stage(tmp_dir, dvc, scm)\n    orig_dvclock = (tmp_dir / \"dvc.lock\").read_text()\n    (tmp_dir / \"bar\").write_text(\"modified\")\n\n    tmp_dir.dvc_gen({\"file\": \"orig\"}, commit=\"add files\")\n    orig_dvcfile = (tmp_dir / \"file.dvc\").read_text()\n    (tmp_dir / \"file\").write_text(\"modified\")\n\n    dvc.experiments.save([\"file\"])\n    assert (tmp_dir / \"bar\").read_text() == \"modified\"\n    assert (tmp_dir / \"dvc.lock\").read_text() == orig_dvclock\n    assert (tmp_dir / \"file\").read_text() == \"modified\"\n    assert (tmp_dir / \"file.dvc\").read_text() != orig_dvcfile\n"
  },
  {
    "path": "tests/func/experiments/test_set_params.py",
    "content": "import pytest\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom tests.func.utils.test_hydra import hydra_setup\n\n\n@pytest.mark.parametrize(\n    \"changes, expected\",\n    [\n        ([\"foo=baz\"], \"foo: baz\\ngoo:\\n  bag: 3.0\\nlorem: false\"),\n        ([\"params.yaml:foo=baz\"], \"foo: baz\\ngoo:\\n  bag: 3.0\\nlorem: false\"),\n    ],\n)\ndef test_modify_params(params_repo, dvc, changes, expected):\n    dvc.experiments.run(params=changes)\n    with open(\"params.yaml\") as fobj:\n        assert fobj.read().strip() == expected\n\n\n@pytest.mark.parametrize(\"hydra_enabled\", [True, False])\n@pytest.mark.parametrize(\n    \"config_dir,config_name\",\n    [\n        (None, None),\n        (None, \"bar\"),\n        (\"conf\", \"bar\"),\n    ],\n)\n@pytest.mark.parametrize(\"no_hydra\", [True, False])\ndef test_hydra_compose_and_dump(\n    tmp_dir, params_repo, dvc, hydra_enabled, config_dir, config_name, no_hydra\n):\n    hydra_setup(\n        tmp_dir,\n        config_dir=config_dir or \"conf\",\n        config_name=config_name or \"config\",\n    )\n\n    dvc.experiments.run()\n    assert (tmp_dir / \"params.yaml\").parse() == {\n        \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n        \"goo\": {\"bag\": 3.0},\n        \"lorem\": False,\n    }\n\n    with dvc.config.edit() as conf:\n        if hydra_enabled:\n            conf[\"hydra\"][\"enabled\"] = True\n        if config_dir is not None:\n            conf[\"hydra\"][\"config_dir\"] = config_dir\n        if config_name is not None:\n            conf[\"hydra\"][\"config_name\"] = config_name\n\n    dvc.experiments.run(no_hydra=no_hydra)\n\n    if hydra_enabled and not no_hydra:\n        assert (tmp_dir / \"params.yaml\").parse() == {\n            \"db\": {\"driver\": \"mysql\", \"user\": \"omry\", \"pass\": \"secret\"},\n        }\n\n        dvc.experiments.run(params=[\"db=postgresql\"], no_hydra=no_hydra)\n        assert (tmp_dir / \"params.yaml\").parse() == {\n            \"db\": {\n                \"driver\": \"postgresql\",\n                \"user\": \"foo\",\n                \"pass\": \"bar\",\n                \"timeout\": 10,\n            }\n        }\n    else:\n        assert (tmp_dir / \"params.yaml\").parse() == {\n            \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n            \"goo\": {\"bag\": 3.0},\n            \"lorem\": False,\n        }\n\n\n@pytest.mark.parametrize(\n    \"hydra_enabled,overrides,expected\",\n    [\n        (\n            True,\n            [\"db=mysql,postgresql\"],\n            [\n                {\"params.yaml\": [\"db=mysql\"]},\n                {\"params.yaml\": [\"db=postgresql\"]},\n            ],\n        ),\n        (\n            False,\n            [\"foo=bar,baz\"],\n            [{\"params.yaml\": [\"foo=bar\"]}, {\"params.yaml\": [\"foo=baz\"]}],\n        ),\n        (\n            False,\n            [],\n            [{}],\n        ),\n    ],\n)\ndef test_hydra_sweep(\n    tmp_dir, params_repo, dvc, mocker, hydra_enabled, overrides, expected\n):\n    patched = mocker.patch.object(dvc.experiments, \"queue_one\")\n\n    if hydra_enabled:\n        hydra_setup(tmp_dir, config_dir=\"conf\", config_name=\"config\")\n        with dvc.config.edit() as conf:\n            conf[\"hydra\"][\"enabled\"] = True\n\n    dvc.experiments.run(params=overrides, queue=True)\n\n    assert patched.call_count == len(expected)\n    for e in expected:\n        patched.assert_any_call(\n            mocker.ANY,\n            params=e,\n            targets=None,\n            copy_paths=None,\n            message=None,\n            no_hydra=False,\n        )\n\n\ndef test_hydra_sweep_requires_queue(params_repo, dvc):\n    with pytest.raises(\n        InvalidArgumentError,\n        match=\"Sweep overrides can't be used without `--queue`\",\n    ):\n        dvc.experiments.run(params=[\"db=mysql,postgresql\"])\n\n\ndef test_hydra_sweep_prefix_name(tmp_dir, params_repo, dvc):\n    prefix = \"foo\"\n    db_values = [\"mysql\", \"postgresql\"]\n    param = \"+db=\" + \",\".join(db_values)\n    dvc.experiments.run(params=[param], queue=True, name=prefix)\n    expected_names = [f\"{prefix}-{i + 1}\" for i, _ in enumerate(db_values)]\n    exp_names = [entry.name for entry in dvc.experiments.celery_queue.iter_queued()]\n    for name, expected in zip(exp_names, expected_names):\n        assert name == expected\n\n\ndef test_mixing_no_hydra_and_params_flags(tmp_dir, params_repo, dvc):\n    # Passing no_hydra should not prevent user from\n    # using --set-param on unmodified params.yaml\n    hydra_setup(\n        tmp_dir,\n        config_dir=\"conf\",\n        config_name=\"config\",\n    )\n\n    with dvc.config.edit() as conf:\n        conf[\"hydra\"][\"enabled\"] = True\n        conf[\"hydra\"][\"config_dir\"] = \"conf\"\n        conf[\"hydra\"][\"config_name\"] = \"config\"\n\n    dvc.experiments.run(no_hydra=True, params=[\"goo.bag=10.0\"])\n\n    assert (tmp_dir / \"params.yaml\").parse() == {\n        \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n        \"goo\": {\"bag\": 10.0},\n        \"lorem\": False,\n    }\n\n\n@pytest.mark.parametrize(\n    \"hydra_enabled,overrides,expected\",\n    [\n        (\n            True,\n            [\"db=mysql,postgresql\"],\n            [\n                {\"params.yaml\": [\"db=mysql\"]},\n                {\"params.yaml\": [\"db=postgresql\"]},\n            ],\n        ),\n        (\n            False,\n            [\"foo=bar,baz\"],\n            [{\"params.yaml\": [\"foo=bar\"]}, {\"params.yaml\": [\"foo=baz\"]}],\n        ),\n        (\n            False,\n            [],\n            [{}],\n        ),\n    ],\n)\n@pytest.mark.parametrize(\"no_hydra\", [True, False])\ndef test_mixing_no_hydra_and_sweeps(\n    tmp_dir, params_repo, dvc, mocker, hydra_enabled, overrides, expected, no_hydra\n):\n    # Passing no_hydra should not prevent user from\n    # queuing sweeps with --set-param and --queue\n    patched = mocker.patch.object(dvc.experiments, \"queue_one\")\n\n    if hydra_enabled:\n        hydra_setup(tmp_dir, config_dir=\"conf\", config_name=\"config\")\n        with dvc.config.edit() as conf:\n            conf[\"hydra\"][\"enabled\"] = True\n\n    dvc.experiments.run(params=overrides, queue=True, no_hydra=no_hydra)\n\n    assert patched.call_count == len(expected)\n    for e in expected:\n        patched.assert_any_call(\n            mocker.ANY,\n            params=e,\n            targets=None,\n            copy_paths=None,\n            message=None,\n            no_hydra=no_hydra,\n        )\n"
  },
  {
    "path": "tests/func/experiments/test_show.py",
    "content": "import logging\nimport os\nfrom datetime import datetime\nfrom unittest.mock import ANY\n\nimport pytest\nfrom funcy import first\nfrom scmrepo.exceptions import SCMError\n\nfrom dvc.cli import main\nfrom dvc.repo.experiments.executor.base import BaseExecutor, ExecutorInfo, TaskStatus\nfrom dvc.repo.experiments.refs import CELERY_STASH\nfrom dvc.repo.experiments.utils import EXEC_PID_DIR, EXEC_TMP_DIR, exp_refs_by_rev\nfrom dvc.utils import relpath\n\nLOCK_CONTENTS = {\n    \"read\": {\n        \"data/MNIST\": [{\"pid\": 54062, \"cmd\": \"dvc exp run\"}],\n    },\n    \"write\": {\n        \"data/MNIST\": {\"pid\": 54062, \"cmd\": \"dvc exp run\"},\n    },\n}\n\n\ndef make_executor_info(**kwargs):\n    # set default values for required info fields\n    for key in (\n        \"git_url\",\n        \"baseline_rev\",\n        \"location\",\n        \"root_dir\",\n        \"dvc_dir\",\n    ):\n        if key not in kwargs:\n            kwargs[key] = \"\"\n    return ExecutorInfo(**kwargs)\n\n\ndef make_executor(local=None, **kwargs):\n    if local:\n        local_executor = {\n            \"root\": ANY,\n            \"log\": ANY,\n            \"pid\": ANY,\n            \"returncode\": ANY,\n            \"task_id\": ANY,\n        }\n        local_executor.update(local)\n    else:\n        local_executor = ANY\n    data = {\"state\": ANY, \"local\": local_executor, \"name\": ANY}\n    data.update(kwargs)\n    return data\n\n\ndef make_data(params=None, **kwargs):\n    params = {\"data\": params or {\"foo\": 1}}\n    data = {\n        \"rev\": ANY,\n        \"deps\": {\"copy.py\": {\"hash\": ANY, \"size\": ANY, \"nfiles\": None}},\n        \"metrics\": {\"metrics.yaml\": params},\n        \"outs\": {},\n        \"params\": {\"params.yaml\": params},\n        \"timestamp\": ANY,\n        \"meta\": ANY,\n    }\n    data.update(kwargs)\n    return data\n\n\n@pytest.mark.vscode\ndef test_show_branch_and_tag_name(tmp_dir, scm, dvc, exp_stage):\n    with tmp_dir.branch(\"new/branch\", new=True):\n        tmp_dir.scm_gen(\"branch\", \"branch\", \"commit\")\n\n    result = dvc.experiments.show(all_branches=True)\n    expected = [None, \"master\", \"new/branch\"]\n    assert [exp.name for exp in result] == expected\n\n    scm.tag(\"new/tag\")\n    tag_rev = scm.get_rev()\n    with scm.detach_head(tag_rev):\n        result = dvc.experiments.show(all_tags=True)\n    expected = [None, \"new/tag\"]\n    assert [exp.name for exp in result] == expected\n\n\n@pytest.mark.vscode\ndef test_show(tmp_dir, scm, dvc, exp_stage):\n    workspace_result = {\n        \"rev\": \"workspace\",\n        \"name\": None,\n        \"data\": make_data(rev=\"workspace\"),\n        \"error\": None,\n        \"experiments\": None,\n    }\n    branch_result = {\n        \"rev\": scm.get_rev(),\n        \"name\": \"master\",\n        \"data\": make_data(rev=scm.get_rev()),\n        \"error\": None,\n        \"experiments\": None,\n    }\n\n    ret = [d.dumpd() for d in dvc.experiments.show()]\n    assert ret == [workspace_result, branch_result]\n\n    ret = [d.dumpd() for d in dvc.experiments.show(hide_workspace=True)]\n    assert ret == [branch_result]\n\n\n@pytest.mark.vscode\n@pytest.mark.parametrize(\"workspace\", [True, False])\ndef test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace):\n    baseline_rev = scm.get_rev()\n    timestamp = datetime.fromtimestamp(  # noqa: DTZ006\n        scm.resolve_commit(baseline_rev).commit_time\n    )\n\n    exp_rev = first(\n        dvc.experiments.run(\n            exp_stage.addressing, params=[\"foo=2\"], tmp_dir=not workspace\n        )\n    )\n    results = dvc.experiments.show()\n    assert results[1].dumpd() == {\n        \"rev\": baseline_rev,\n        \"name\": \"master\",\n        \"data\": make_data(rev=baseline_rev, timestamp=timestamp),\n        \"error\": None,\n        \"experiments\": [\n            {\n                \"revs\": [\n                    {\n                        \"rev\": exp_rev,\n                        \"name\": ANY,\n                        \"data\": make_data(rev=exp_rev, params={\"foo\": 2}),\n                        \"error\": None,\n                        \"experiments\": None,\n                    }\n                ],\n                \"executor\": None,\n                \"name\": ANY,\n            }\n        ],\n    }\n\n\n@pytest.mark.vscode\ndef test_show_queued(tmp_dir, scm, dvc, exp_stage):\n    baseline_rev = scm.get_rev()\n\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], queue=True, name=\"test_name\"\n    )\n    exp_rev = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n\n    results = dvc.experiments.show()\n    assert results[1].dumpd() == {\n        \"rev\": baseline_rev,\n        \"name\": \"master\",\n        \"data\": make_data(rev=baseline_rev),\n        \"error\": None,\n        \"experiments\": [\n            {\n                \"revs\": [\n                    {\n                        \"rev\": exp_rev,\n                        \"name\": \"test_name\",\n                        \"data\": make_data(rev=exp_rev, params={\"foo\": 2}, metrics=ANY),\n                        \"error\": None,\n                        \"experiments\": None,\n                    }\n                ],\n                \"executor\": make_executor(state=\"queued\"),\n                \"name\": \"test_name\",\n            }\n        ],\n    }\n\n    # test that only queued experiments for the current baseline are returned\n    tmp_dir.gen(\"foo\", \"foo\")\n    scm.add([\"foo\"])\n    scm.commit(\"new commit\")\n    new_rev = scm.get_rev()\n\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], queue=True)\n    exp_rev = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n\n    results = dvc.experiments.show()\n    assert results[1].dumpd() == {\n        \"rev\": new_rev,\n        \"name\": \"master\",\n        \"data\": make_data(rev=new_rev),\n        \"error\": None,\n        \"experiments\": [\n            {\n                \"revs\": [\n                    {\n                        \"rev\": exp_rev,\n                        \"name\": ANY,\n                        \"data\": make_data(rev=exp_rev, params={\"foo\": 3}, metrics=ANY),\n                        \"error\": None,\n                        \"experiments\": None,\n                    }\n                ],\n                \"executor\": make_executor(state=\"queued\"),\n                \"name\": ANY,\n            }\n        ],\n    }\n\n\n@pytest.mark.vscode\ndef test_show_failed_experiment(tmp_dir, scm, dvc, failed_exp_stage, test_queue):\n    baseline_rev = scm.get_rev()\n    dvc.experiments.run(failed_exp_stage.addressing, params=[\"foo=2\"], queue=True)\n    exp_rev = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n    dvc.experiments.run(run_all=True)\n\n    results = dvc.experiments.show()\n    assert results[1].dumpd() == {\n        \"rev\": baseline_rev,\n        \"name\": \"master\",\n        \"data\": make_data(rev=baseline_rev, metrics=ANY),\n        \"error\": None,\n        \"experiments\": [\n            {\n                \"revs\": [\n                    {\n                        \"rev\": exp_rev,\n                        \"name\": ANY,\n                        \"data\": make_data(rev=exp_rev, params={\"foo\": 2}, metrics=ANY),\n                        \"error\": {\"msg\": \"Experiment run failed\", \"type\": ANY},\n                        \"experiments\": None,\n                    }\n                ],\n                \"executor\": make_executor(state=\"failed\", local={\"returncode\": 255}),\n                \"name\": ANY,\n            }\n        ],\n    }\n\n\ndef test_show_filter(tmp_dir, scm, dvc, capsys, copy_script):\n    capsys.readouterr()\n\n    params_file = tmp_dir / \"params.yaml\"\n    params_data = {\n        \"foo\": 1,\n        \"bar\": 1,\n        \"train/foo\": 1,\n        \"train/bar\": 1,\n        \"nested\": {\"foo\": 1, \"bar\": 1},\n    }\n    (tmp_dir / params_file).dump(params_data)\n\n    dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"metrics.yaml\",\n            \".gitignore\",\n        ]\n    )\n    scm.commit(\"init\")\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--drop=.*foo\"]) == 0\n    cap = capsys.readouterr()\n    for filtered in [\"foo\", \"train/foo\", \"nested.foo\"]:\n        assert f\"params.yaml:{filtered}\" not in cap.out\n        assert f\"metrics.yaml:{filtered}\" not in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--drop=.*foo\", \"--keep=.*train\"]) == 0\n    cap = capsys.readouterr()\n    for filtered in [\"foo\", \"nested.foo\"]:\n        assert f\"params.yaml:{filtered}\" not in cap.out\n        assert f\"metrics.yaml:{filtered}\" not in cap.out\n    assert \"params.yaml:train/foo\" in cap.out\n    assert \"metrics.yaml:train/foo\" in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--drop=params.yaml:.*foo\"]) == 0\n    cap = capsys.readouterr()\n    for filtered in [\"foo\", \"train/foo\", \"nested.foo\"]:\n        assert f\"params.yaml:{filtered}\" not in cap.out\n        assert f\"metrics.yaml:{filtered}\" in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--drop=Created\"]) == 0\n    cap = capsys.readouterr()\n    assert \"Created\" not in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--drop=Created|Experiment\"]) == 0\n    cap = capsys.readouterr()\n    assert \"Created\" not in cap.out\n    assert \"Experiment\" not in cap.out\n\n\n@pytest.mark.vscode\ndef test_show_multiple_commits(tmp_dir, scm, dvc, exp_stage):\n    init_rev = scm.get_rev()\n    tmp_dir.scm_gen(\"file\", \"file\", \"commit\")\n    next_rev = scm.get_rev()\n\n    dvc.experiments.show(num=-2)\n\n    expected = [\"workspace\", next_rev, init_rev]\n    results = dvc.experiments.show(num=2)\n    assert [exp.rev for exp in results] == expected\n\n    expected = [\"workspace\", *scm.branch_revs(\"master\")]\n    results = dvc.experiments.show(all_commits=True)\n    assert [exp.rev for exp in results] == expected\n\n    results = dvc.experiments.show(num=100)\n    assert [exp.rev for exp in results] == expected\n\n\ndef test_show_sort(tmp_dir, scm, dvc, exp_stage, caplog):\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n\n    with caplog.at_level(logging.ERROR):\n        assert main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=bar\"]) != 0\n        assert \"Unknown sort column\" in caplog.text\n\n    with caplog.at_level(logging.ERROR):\n        assert main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=foo\"]) != 0\n        assert \"Ambiguous sort column\" in caplog.text\n\n    assert main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=params.yaml:foo\"]) == 0\n\n    assert main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=metrics.yaml:foo\"]) == 0\n\n\ndef test_show_sort_metric_sep(tmp_dir, scm, dvc, caplog):\n    metrics_path = tmp_dir / \"metrics:1.json\"\n    metrics_path.write_text('{\"my::metric\": 1, \"other_metric\": 0.5}')\n    metrics_path = tmp_dir / \"metrics:2.json\"\n    metrics_path.write_text('{\"my::metric\": 2}')\n    dvcyaml_path = tmp_dir / \"dvc.yaml\"\n    dvcyaml_path.write_text(\"metrics: ['metrics:1.json', 'metrics:2.json']\")\n    dvc.experiments.save()\n    assert (\n        main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=metrics:1.json:my::metric\"]) == 0\n    )\n    assert main([\"exp\", \"show\", \"--no-pager\", \"--sort-by=:other_metric\"]) == 0\n\n\n@pytest.mark.vscode\n@pytest.mark.parametrize(\n    \"status, pid_exists\",\n    [\n        (TaskStatus.RUNNING, True),\n        (TaskStatus.RUNNING, False),\n        (TaskStatus.FAILED, False),\n    ],\n)\ndef test_show_running(\n    tmp_dir, scm, dvc, exp_stage, capsys, caplog, status, pid_exists, mocker\n):\n    from dvc.rwlock import RWLOCK_FILE\n    from dvc_task.proc.process import ProcessInfo\n\n    baseline_rev = scm.get_rev()\n    pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)\n    lock_file = relpath(os.path.join(dvc.tmp_dir, RWLOCK_FILE), str(tmp_dir))\n    info = make_executor_info(\n        location=BaseExecutor.DEFAULT_LOCATION,\n        status=status,\n        baseline_rev=baseline_rev,\n    )\n    pidfile = os.path.join(\n        pid_dir,\n        \"workspace\",\n        f\"workspace{BaseExecutor.INFOFILE_EXT}\",\n    )\n    os.makedirs(os.path.dirname(pidfile), exist_ok=True)\n    (tmp_dir / pidfile).dump_json(info.asdict())\n    (tmp_dir / lock_file).dump_json(LOCK_CONTENTS)\n\n    mocker.patch.object(ProcessInfo, \"load\", return_value=mocker.Mock(pid=123))\n    mocker.patch(\"psutil.pid_exists\", return_value=pid_exists)\n\n    tempdir_active = mocker.spy(dvc.experiments.tempdir_queue, \"collect_active_data\")\n    celery_active = mocker.spy(dvc.experiments.celery_queue, \"collect_active_data\")\n    results = dvc.experiments.show()\n    assert results[1].dumpd() == {\n        \"rev\": ANY,\n        \"name\": \"master\",\n        \"data\": make_data(),\n        \"error\": None,\n        \"experiments\": [\n            {\n                \"revs\": ANY,\n                \"executor\": make_executor(state=\"running\"),\n                \"name\": ANY,\n            }\n        ]\n        if pid_exists\n        else None,\n    }\n    tempdir_active.assert_called_once()\n    celery_active.assert_called_once()\n\n\ndef test_show_with_broken_repo(tmp_dir, scm, dvc, exp_stage, caplog):\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    with open(\"dvc.yaml\", \"a\", encoding=\"utf-8\") as fd:\n        fd.write(\"breaking the yaml!\")\n\n    results = dvc.experiments.show()\n    assert results[0].error\n    assert results[0].error.type == \"YAMLSyntaxError\"\n\n    for exp_range in results[1].experiments:\n        assert not any(exp.error for exp in exp_range)\n\n\ndef test_show_csv(tmp_dir, scm, dvc, exp_stage, capsys):\n    import time\n\n    baseline_rev = scm.get_rev()\n\n    def _get_rev_isotimestamp(rev):\n        return datetime.fromtimestamp(  # noqa: DTZ006\n            scm.resolve_commit(rev).commit_time\n        ).isoformat()\n\n    result1 = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    rev1 = first(result1)\n    ref_info1 = first(exp_refs_by_rev(scm, rev1))\n\n    # at least 1 second gap between these experiments to make sure\n    # the previous experiment to be regarded as branch_base\n    time.sleep(1)\n    result2 = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    rev2 = first(result2)\n    ref_info2 = first(exp_refs_by_rev(scm, rev2))\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--csv\"]) == 0\n    cap = capsys.readouterr()\n    data_dep = first(x for x in dvc.index.deps if \"copy.py\" in x.fspath)\n    data_hash = data_dep.hash_info.value[:7]\n    assert \"Experiment,rev,typ,Created,parent\" in cap.out\n    assert \"metrics.yaml:foo,params.yaml:foo,copy.py\" in cap.out\n    assert f\",workspace,baseline,,,3,3,{data_hash}\" in cap.out\n    assert (\n        \",master,baseline,{},,1,1,{}\".format(  # noqa: UP032\n            _get_rev_isotimestamp(baseline_rev), data_hash\n        )\n        in cap.out\n    )\n    assert (\n        f\"{ref_info1.name},{rev1[:7]},branch_base,{_get_rev_isotimestamp(rev1)},,2,2,{data_hash}\"\n        in cap.out\n    )\n    assert (\n        f\"{ref_info2.name},{rev2[:7]},branch_commit,{_get_rev_isotimestamp(rev2)},,3,3,{data_hash}\"\n        in cap.out\n    )\n\n\ndef test_show_only_changed(tmp_dir, dvc, scm, capsys, copy_script):\n    params_file = tmp_dir / \"params.yaml\"\n    params_data = {\"foo\": 1, \"goobar\": 1}\n    (tmp_dir / params_file).dump(params_data)\n\n    dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\", \"goobar\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"metrics.yaml\",\n            \".gitignore\",\n        ]\n    )\n    scm.commit(\"init\")\n\n    dvc.experiments.run(params=[\"foo=2\"])\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\"]) == 0\n    cap = capsys.readouterr()\n    assert \"goobar\" in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--only-changed\"]) == 0\n    cap = capsys.readouterr()\n    assert \"goobar\" not in cap.out\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--only-changed\", \"--keep=.*bar\"]) == 0\n    cap = capsys.readouterr()\n    assert \"params.yaml:goobar\" in cap.out\n    assert \"metrics.yaml:goobar\" in cap.out\n\n\n@pytest.mark.vscode\ndef test_show_outs(tmp_dir, dvc, scm, erepo_dir, copy_script):\n    params_file = tmp_dir / \"params.yaml\"\n    params_data = {\"foo\": 1, \"bar\": 1}\n    (tmp_dir / params_file).dump(params_data)\n\n    dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml && echo out > out\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\", \"bar\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n        outs=[\"out\"],\n    )\n\n    scm.commit(\"init\")\n\n    results = dvc.experiments.show()\n    assert results[0].dumpd() == {\n        \"rev\": \"workspace\",\n        \"name\": None,\n        \"data\": make_data(\n            params=ANY,\n            outs={\n                \"out\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": False,\n                }\n            },\n        ),\n        \"error\": None,\n        \"experiments\": None,\n    }\n\n    tmp_dir.dvc_gen(\"out_add\", \"foo\", commit=\"dvc add output\")\n    results = dvc.experiments.show()\n    assert results[0].dumpd() == {\n        \"rev\": \"workspace\",\n        \"name\": None,\n        \"data\": make_data(\n            params=ANY,\n            outs={\n                \"out\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": False,\n                },\n                \"out_add\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": True,\n                },\n            },\n        ),\n        \"error\": None,\n        \"experiments\": None,\n    }\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"out\", \"out content\", commit=\"create out\")\n\n    dvc.imp(os.fspath(erepo_dir), \"out\", \"out_imported\")\n\n    results = dvc.experiments.show()\n    assert results[0].dumpd() == {\n        \"rev\": \"workspace\",\n        \"name\": None,\n        \"data\": make_data(\n            params=ANY,\n            outs={\n                \"out\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": False,\n                },\n                \"out_add\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": True,\n                },\n                \"out_imported\": {\n                    \"hash\": ANY,\n                    \"size\": ANY,\n                    \"nfiles\": None,\n                    \"use_cache\": True,\n                    \"is_data_source\": True,\n                },\n            },\n        ),\n        \"error\": None,\n        \"experiments\": None,\n    }\n\n\ndef test_metrics_renaming(tmp_dir, dvc, scm, capsys, copy_script):\n    params_file = tmp_dir / \"params.yaml\"\n    params_data = {\"foo\": 1}\n    (tmp_dir / params_file).dump(params_data)\n\n    dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"metrics.yaml\",\n            \".gitignore\",\n        ]\n    )\n\n    scm.commit(\"metrics.yaml\")\n    metrics_rev = scm.get_rev()\n\n    dvc.run(\n        cmd=\"python copy.py params.yaml scores.yaml\",\n        metrics_no_cache=[\"scores.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add([\"dvc.yaml\", \"dvc.lock\", \"params.yaml\", \"scores.yaml\"])\n    scm.commit(\"scores.yaml\")\n    scores_rev = scm.get_rev()\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--csv\", \"-A\"]) == 0\n    cap = capsys.readouterr()\n\n    def _get_rev_isotimestamp(rev):\n        return datetime.fromtimestamp(  # noqa: DTZ006\n            scm.resolve_commit(rev).commit_time\n        ).isoformat()\n\n    assert f\",master,baseline,{_get_rev_isotimestamp(scores_rev)},,1,,1\" in cap.out\n    assert (\n        \",{},baseline,{},,,1,1\".format(  # noqa: UP032\n            metrics_rev[:7], _get_rev_isotimestamp(metrics_rev)\n        )\n        in cap.out\n    )\n\n\ndef test_show_sorted_deps(tmp_dir, dvc, scm, capsys):\n    tmp_dir.gen(\"a\", \"a\")\n    tmp_dir.gen(\"b\", \"b\")\n    tmp_dir.gen(\"c\", \"c\")\n    tmp_dir.gen(\"z\", \"z\")\n\n    dvc.run(cmd=\"echo foo\", name=\"deps\", deps=[\"a\", \"b\", \"z\", \"c\"])\n\n    capsys.readouterr()\n    assert main([\"exp\", \"show\", \"--csv\"]) == 0\n    cap = capsys.readouterr()\n    assert \"a,b,c,z\" in cap.out\n\n\n@pytest.mark.vscode\ndef test_show_queued_error(tmp_dir, scm, dvc, exp_stage, mocker):\n    dvc.experiments.run(\n        exp_stage.addressing, params=[\"foo=2\"], queue=True, name=\"test_name\"\n    )\n    exp_rev_2 = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n    commit_2 = scm.resolve_commit(exp_rev_2)\n\n    dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"], queue=True)\n    exp_rev_3 = dvc.experiments.scm.resolve_rev(f\"{CELERY_STASH}@{{0}}\")\n\n    def resolve_commit(rev):\n        if rev == exp_rev_3:\n            raise SCMError\n        return commit_2\n\n    mocker.patch.object(\n        scm,\n        \"resolve_commit\",\n        side_effect=mocker.MagicMock(side_effect=resolve_commit),\n    )\n\n    results = dvc.experiments.show()[1].experiments\n    assert len(results) == 2\n    queued = results[0]\n    assert queued.executor.state == \"queued\"\n    errored = results[1]\n    assert errored.revs[0].error\n\n\n@pytest.mark.vscode\ndef test_show_completed_error(tmp_dir, scm, dvc, exp_stage, mocker):\n    result_2 = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_rev_2 = first(result_2)\n    commit_2 = scm.resolve_commit(exp_rev_2)\n    result_3 = dvc.experiments.run(exp_stage.addressing, params=[\"foo=3\"])\n    exp_rev_3 = first(result_3)\n\n    def resolve_commit(rev):\n        if rev == exp_rev_3:\n            raise SCMError\n        return commit_2\n\n    mocker.patch.object(\n        scm,\n        \"resolve_commit\",\n        side_effect=mocker.MagicMock(side_effect=resolve_commit),\n    )\n    results = dvc.experiments.show()[1].experiments\n    assert len(results) == 1\n    assert not results[0].revs[0].error\n\n\n@pytest.mark.vscode\ndef test_show_baseline_error(tmp_dir, scm, dvc, exp_stage, mocker):\n    baseline_rev = scm.get_rev()\n\n    result_2 = dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"])\n    exp_rev_2 = first(result_2)\n    commit_2 = scm.resolve_commit(exp_rev_2)\n\n    def resolve_commit(rev):\n        if rev == baseline_rev:\n            raise SCMError\n        return commit_2\n\n    mocker.patch.object(\n        scm,\n        \"resolve_commit\",\n        side_effect=mocker.MagicMock(side_effect=resolve_commit),\n    )\n\n    results = dvc.experiments.show()\n    assert results[1].error\n    assert len(results[1].experiments) == 1\n"
  },
  {
    "path": "tests/func/experiments/test_stash_exp.py",
    "content": "import pytest\nfrom funcy import first\n\nfrom dvc.dependency.base import DependencyDoesNotExistError\nfrom dvc.exceptions import ReproductionError\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\n@pytest.mark.parametrize(\"staged\", [True, False])\ndef test_deleted(tmp_dir, scm, dvc, tmp, staged):\n    tmp_dir.scm_gen(\"file\", \"file\", commit=\"commit file\")\n    stage = dvc.stage.add(cmd=\"cat file\", deps=[\"file\"], name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    file = tmp_dir / \"file\"\n    file.unlink()\n    if staged:\n        scm.add([\"file\"])\n\n    with pytest.raises(ReproductionError) as exc_info:\n        dvc.experiments.run(stage.addressing, tmp_dir=tmp)\n\n    cause = exc_info._excinfo[1].__cause__\n    assert isinstance(cause, DependencyDoesNotExistError)\n    assert not file.exists()\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\n@pytest.mark.parametrize(\"staged\", [True, False])\ndef test_modified(tmp_dir, scm, dvc, caplog, tmp, staged):\n    tmp_dir.scm_gen(\"file\", \"file\", commit=\"commit file\")\n    stage = dvc.stage.add(cmd=\"cat file\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    (tmp_dir / \"file\").write_text(\"modified_file\")\n    if staged:\n        scm.add([\"file\"])\n\n    results = dvc.experiments.run(stage.addressing, tmp_dir=tmp)\n\n    exp = first(results)\n    scm.checkout(exp, force=True)\n    assert (tmp_dir / \"file\").read_text() == \"modified_file\"\n\n\n@pytest.mark.parametrize(\"tmp\", [True, False])\ndef test_staged_new_file(tmp_dir, scm, dvc, tmp):\n    stage = dvc.stage.add(cmd=\"cat file\", name=\"foo\")\n    scm.add_commit([\"dvc.yaml\"], message=\"add dvc.yaml\")\n\n    (tmp_dir / \"file\").write_text(\"file\")\n    scm.add([\"file\"])\n\n    results = dvc.experiments.run(stage.addressing, tmp_dir=tmp)\n    exp = first(results)\n    fs = scm.get_fs(exp)\n    assert fs.exists(\"file\")\n"
  },
  {
    "path": "tests/func/experiments/test_utils.py",
    "content": "from funcy import first\n\n\ndef test_generate_random_exp_name(tmp_dir, dvc, scm, exp_stage, mocker):\n    mocked_generator = mocker.MagicMock()\n    mocked_generator.choice.side_effect = [0, 0, 0, 0, 1, 1, 0, 0]\n    mocker.patch(\n        \"dvc.repo.experiments.utils.random.Random\", return_value=mocked_generator\n    )\n\n    ref = first(dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"]))\n    assert dvc.experiments.get_exact_name([ref])[ref] == \"0-0\"\n\n    # Causes 1 retry\n    ref = first(dvc.experiments.run(exp_stage.addressing, params=[\"foo=2\"]))\n    assert dvc.experiments.get_exact_name([ref])[ref] == \"1-1\"\n\n    tmp_dir.scm_gen({\"foo\": \"bar\"}, commit=\"foo\")\n    # Can use same name because of different baseline_rev\n    ref = first(dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"]))\n    assert dvc.experiments.get_exact_name([ref])[ref] == \"0-0\"\n"
  },
  {
    "path": "tests/func/metrics/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/metrics/test_diff.py",
    "content": "import json\nfrom os.path import join\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.testing import matchers as M\nfrom dvc.utils import relpath\nfrom dvc.utils.serialize import JSONFileCorruptedError\n\n\ndef test_metrics_diff_simple(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        tmp_dir.gen({\"m_temp.yaml\": str(val)})\n        run_copy_metrics(\n            \"m_temp.yaml\", \"m.yaml\", name=\"copy-metrics\", metrics=[\"m.yaml\"]\n        )\n        dvc.scm.commit(str(val))\n\n    _gen(1)\n    _gen(2)\n    _gen(3)\n\n    expected = {\"m.yaml\": {\"\": {\"old\": 1, \"new\": 3, \"diff\": 2}}}\n\n    assert dvc.metrics.diff(a_rev=\"HEAD~2\") == {\"diff\": expected}\n\n\ndef test_metrics_diff_yaml(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        metrics = {\"a\": {\"b\": {\"c\": val, \"d\": 1, \"e\": str(val)}}}\n        (tmp_dir / \"m_temp.yaml\").dump(metrics)\n        run_copy_metrics(\n            \"m_temp.yaml\",\n            \"m.yaml\",\n            name=\"copy-metrics\",\n            metrics=[\"m.yaml\"],\n            commit=str(val),\n        )\n\n    _gen(1)\n    _gen(2)\n    _gen(3)\n\n    expected = {\n        \"m.yaml\": {\n            \"a.b.e\": {\"old\": \"1\", \"new\": \"3\"},\n            \"a.b.c\": {\"old\": 1, \"new\": 3, \"diff\": 2},\n        }\n    }\n\n    assert dvc.metrics.diff(a_rev=\"HEAD~2\") == {\"diff\": expected}\n\n\ndef test_metrics_diff_json(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        metrics = {\"a\": {\"b\": {\"c\": val, \"d\": 1, \"e\": str(val)}}}\n        (tmp_dir / \"m_temp.json\").dump(metrics)\n        run_copy_metrics(\n            \"m_temp.json\",\n            \"m.json\",\n            name=\"copy-metrics\",\n            metrics=[\"m.json\"],\n            commit=str(val),\n        )\n\n    _gen(1)\n    _gen(2)\n    _gen(3)\n\n    expected = {\n        \"m.json\": {\n            \"a.b.e\": {\"old\": \"1\", \"new\": \"3\"},\n            \"a.b.c\": {\"old\": 1, \"new\": 3, \"diff\": 2},\n        }\n    }\n    assert dvc.metrics.diff(a_rev=\"HEAD~2\") == {\"diff\": expected}\n\n\ndef test_metrics_diff_json_unchanged(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        metrics = {\"a\": {\"b\": {\"c\": val, \"d\": 1, \"e\": str(val)}}}\n        (tmp_dir / \"m_temp.json\").dump(metrics)\n        run_copy_metrics(\n            \"m_temp.json\",\n            \"m.json\",\n            name=\"copy-metrics\",\n            metrics=[\"m.json\"],\n            commit=str(val),\n        )\n\n    _gen(1)\n    _gen(2)\n    _gen(1)\n\n    assert dvc.metrics.diff(a_rev=\"HEAD~2\") == {}\n\n\ndef test_metrics_diff_broken_json(tmp_dir, scm, dvc, run_copy_metrics):\n    metrics = {\"a\": {\"b\": {\"c\": 1, \"d\": 1, \"e\": \"3\"}}}\n    (tmp_dir / \"m_temp.json\").dump(metrics)\n    run_copy_metrics(\n        \"m_temp.json\",\n        \"m.json\",\n        name=\"copy-metrics\",\n        metrics_no_cache=[\"m.json\"],\n        commit=\"add metrics\",\n    )\n\n    (tmp_dir / \"m.json\").write_text(json.dumps(metrics) + \"ma\\nlformed\\n\")\n\n    assert dvc.metrics.diff() == {\n        \"diff\": {\n            \"m.json\": {\n                \"a.b.e\": {\"old\": \"3\", \"new\": None},\n                \"a.b.c\": {\"old\": 1, \"new\": None},\n                \"a.b.d\": {\"old\": 1, \"new\": None},\n            }\n        },\n        \"errors\": {\"workspace\": {\"m.json\": M.instance_of(JSONFileCorruptedError)}},\n    }\n\n\ndef test_metrics_diff_no_metrics(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n    assert dvc.metrics.diff(a_rev=\"HEAD~1\") == {}\n\n\ndef test_metrics_diff_new_metric(tmp_dir, scm, dvc, run_copy_metrics):\n    metrics = {\"a\": {\"b\": {\"c\": 1, \"d\": 1, \"e\": \"3\"}}}\n    (tmp_dir / \"m_temp.json\").dump(metrics)\n    run_copy_metrics(\n        \"m_temp.json\", \"m.json\", name=\"copy-metrics\", metrics_no_cache=[\"m.json\"]\n    )\n\n    assert dvc.metrics.diff() == {\n        \"diff\": {\n            \"m.json\": {\n                \"a.b.e\": {\"old\": None, \"new\": \"3\"},\n                \"a.b.c\": {\"old\": None, \"new\": 1},\n                \"a.b.d\": {\"old\": None, \"new\": 1},\n            }\n        }\n    }\n\n\ndef test_metrics_diff_deleted_metric(tmp_dir, scm, dvc, run_copy_metrics):\n    metrics = {\"a\": {\"b\": {\"c\": 1, \"d\": 1, \"e\": \"3\"}}}\n    (tmp_dir / \"m_temp.json\").dump(metrics)\n    run_copy_metrics(\n        \"m_temp.json\",\n        \"m.json\",\n        name=\"copy-metrics\",\n        metrics_no_cache=[\"m.json\"],\n        commit=\"add metrics\",\n    )\n\n    (tmp_dir / \"m.json\").unlink()\n\n    assert dvc.metrics.diff() == {\n        \"diff\": {\n            \"m.json\": {\n                \"a.b.e\": {\"old\": \"3\", \"new\": None},\n                \"a.b.c\": {\"old\": 1, \"new\": None},\n                \"a.b.d\": {\"old\": 1, \"new\": None},\n            }\n        },\n        \"errors\": {\"workspace\": {\"m.json\": M.instance_of(FileNotFoundError)}},\n    }\n\n\ndef test_metrics_diff_with_unchanged(tmp_dir, scm, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_temp.yaml\", \"foo: 1\\nxyz: 10\")\n    run_copy_metrics(\n        \"metrics_temp.yaml\",\n        \"metrics.yaml\",\n        name=\"copy-metrics\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        commit=\"1\",\n    )\n\n    tmp_dir.scm_gen(\"metrics.yaml\", \"foo: 2\\nxyz: 10\", commit=\"2\")\n    tmp_dir.scm_gen(\"metrics.yaml\", \"foo: 3\\nxyz: 10\", commit=\"3\")\n\n    assert dvc.metrics.diff(a_rev=\"HEAD~2\", all=True) == {\n        \"diff\": {\n            \"metrics.yaml\": {\n                \"foo\": {\"old\": 1, \"new\": 3, \"diff\": 2},\n                \"xyz\": {\"old\": 10, \"new\": 10, \"diff\": 0},\n            }\n        }\n    }\n\n\ndef test_no_commits(tmp_dir):\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\n    git = Git.init(tmp_dir.fs_path)\n    assert git.no_commits\n\n    assert Repo.init().metrics.diff() == {}\n\n\ndef test_metrics_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        tmp_dir.gen({\"m_temp.yaml\": str(val)})\n        run_copy_metrics(\n            \"m_temp.yaml\", \"m.yaml\", name=\"copy-metrics\", metrics=[\"m.yaml\"]\n        )\n        dvc.scm.commit(str(val))\n\n    _gen(1)\n    _gen(2)\n    _gen(3)\n\n    tmp_dir.gen({\"m.yaml\": \"4\"})\n\n    expected = {\"m.yaml\": {\"\": {\"old\": 3, \"new\": 4, \"diff\": 1}}}\n\n    assert dvc.metrics.diff() == {\"diff\": expected}\n\n\ndef test_metrics_diff_cli(tmp_dir, scm, dvc, run_copy_metrics, caplog, capsys):\n    def _gen(val):\n        tmp_dir.gen({\"m_temp.yaml\": f\"foo: {val}\"})\n        run_copy_metrics(\n            \"m_temp.yaml\", \"m.yaml\", name=\"copy-metrics\", metrics=[\"m.yaml\"]\n        )\n        dvc.scm.commit(str(val))\n\n    _gen(1.23456789)\n    _gen(2.34567891011)\n    _gen(3.45678910111213)\n\n    caplog.clear()\n    capsys.readouterr()  # clearing the buffer\n    assert main([\"metrics\", \"diff\", \"HEAD~2\"]) == 0\n\n    captured = capsys.readouterr()\n\n    assert (\n        captured.out == \"Path    Metric    HEAD~2    workspace    Change\\n\"\n        \"m.yaml  foo       1.23457   3.45679      2.22222\\n\"\n    )\n\n\ndef test_metrics_diff_non_metrics(tmp_dir, scm, dvc):\n    def _gen(val):\n        tmp_dir.scm_gen({\"some_file.yaml\": f\"foo: {val}\"}, commit=str(val))\n\n    _gen(1)\n    _gen(2)\n    _gen(3)\n\n    result = dvc.metrics.diff(targets=[\"some_file.yaml\"], a_rev=\"HEAD~2\")\n    assert result == {\n        \"diff\": {\"some_file.yaml\": {\"foo\": {\"old\": 1, \"new\": 3, \"diff\": 2}}}\n    }\n\n\n@pytest.mark.parametrize(\n    \"dvcfile, metrics_file\",\n    [\n        (\"dvc.yaml\", \"my_metrics.yaml\"),\n        (\"dir/dvc.yaml\", \"my_metrics.yaml\"),\n        (\"dir/dvc.yaml\", join(\"..\", \"my_metrics.yaml\")),\n    ],\n)\ndef test_diff_top_level_metrics(tmp_dir, dvc, scm, dvcfile, metrics_file):\n    directory = (tmp_dir / dvcfile).parent\n    directory.mkdir(exist_ok=True)\n    (tmp_dir / dvcfile).dump({\"metrics\": [metrics_file]})\n\n    metrics_file = directory / metrics_file\n    metrics_file.dump({\"foo\": 3})\n    scm.add_commit([metrics_file, tmp_dir / dvcfile], message=\"add metrics\")\n\n    metrics_file.dump({\"foo\": 5})\n    assert dvc.metrics.diff() == {\n        \"diff\": {\n            relpath(directory / metrics_file): {\"foo\": {\"diff\": 2, \"new\": 5, \"old\": 3}}\n        }\n    }\n\n\ndef test_metrics_diff_active_branch_unchanged(tmp_dir, scm, dvc, run_copy_metrics):\n    def _gen(val):\n        metrics = {\"a\": {\"b\": {\"c\": val, \"d\": 1, \"e\": str(val)}}}\n        (tmp_dir / \"m_temp.yaml\").dump(metrics)\n        run_copy_metrics(\n            \"m_temp.yaml\",\n            \"m.yaml\",\n            name=\"copy-metric\",\n            metrics=[\"m.yaml\"],\n            commit=str(val),\n        )\n\n    _gen(1)\n    _gen(2)\n    _gen(1)\n\n    assert dvc.metrics.diff(a_rev=tmp_dir.scm.active_branch()) == {}\n"
  },
  {
    "path": "tests/func/metrics/test_show.py",
    "content": "import json\nimport os\nimport shutil\nfrom os.path import join\n\nimport pytest\nfrom funcy import get_in\n\nfrom dvc.cli import main\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.exceptions import OverlappingOutputPathsError\nfrom dvc.repo import Repo\nfrom dvc.repo.metrics.show import FileResult, Result\nfrom dvc.testing import matchers as M\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import JSONFileCorruptedError\nfrom dvc_data.index import DataIndexDirError\n\n\ndef test_show_simple(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_t.yaml\", \"1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\", \"metrics.yaml\", name=\"copy-metrics\", metrics=[\"metrics.yaml\"]\n    )\n    assert dvc.metrics.show() == {\"\": {\"data\": {\"metrics.yaml\": {\"data\": 1.1}}}}\n\n\ndef test_show_simple_from_subdir(tmp_dir, dvc, run_copy_metrics):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n    tmp_dir.gen(\"metrics_t.yaml\", \"1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\",\n        \"subdir/metrics.yaml\",\n        name=\"copy-metrics\",\n        metrics=[\"subdir/metrics.yaml\"],\n    )\n\n    expected_path = os.path.join(\"subdir\", \"metrics.yaml\")\n    assert dvc.metrics.show() == {\"\": {\"data\": {expected_path: {\"data\": 1.1}}}}\n\n    expected_path = os.path.join(\"subdir\", \"metrics.yaml\")\n    with subdir.chdir():\n        assert dvc.metrics.show() == {\"\": {\"data\": {expected_path: {\"data\": 1.1}}}}\n    subdir2 = tmp_dir / \"subdir2\"\n    subdir2.mkdir()\n\n    expected_path = os.path.join(\"subdir\", \"metrics.yaml\")\n    with subdir2.chdir():\n        assert dvc.metrics.show() == {\"\": {\"data\": {expected_path: {\"data\": 1.1}}}}\n\n\ndef test_show(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_t.yaml\", \"foo: 1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\", \"metrics.yaml\", name=\"copy-metrics\", metrics=[\"metrics.yaml\"]\n    )\n    assert dvc.metrics.show() == {\n        \"\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 1.1}}}}\n    }\n\n\ndef test_show_toml(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_t.toml\", \"[foo]\\nbar = 1.2\")\n    run_copy_metrics(\n        \"metrics_t.toml\", \"metrics.toml\", name=\"copy-metrics\", metrics=[\"metrics.toml\"]\n    )\n    assert dvc.metrics.show() == {\n        \"\": {\"data\": {\"metrics.toml\": {\"data\": {\"foo\": {\"bar\": 1.2}}}}}\n    }\n\n\ndef test_show_targets(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_t.yaml\", \"foo: 1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\", \"metrics.yaml\", name=\"copy-metrics\", metrics=[\"metrics.yaml\"]\n    )\n    expected = {\"\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 1.1}}}}}\n    assert dvc.metrics.show(targets=[\"metrics.yaml\"]) == expected\n    assert dvc.metrics.show(targets=(tmp_dir / \"metrics.yaml\").fs_path) == expected\n\n\ndef test_show_multiple(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"foo_temp\", \"foo: 1\\n\")\n    tmp_dir.gen(\"baz_temp\", \"baz: 2\\n\")\n    run_copy_metrics(\"foo_temp\", \"foo\", name=\"copy-to-foo\", metrics=[\"foo\"])\n    run_copy_metrics(\"baz_temp\", \"baz\", name=\"copy-to-baz\", metrics=[\"baz\"])\n    assert dvc.metrics.show() == {\n        \"\": {\"data\": {\"foo\": {\"data\": {\"foo\": 1}}, \"baz\": {\"data\": {\"baz\": 2}}}}\n    }\n\n\ndef test_show_branch(tmp_dir, scm, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_temp.yaml\", \"foo: 1\")\n    run_copy_metrics(\n        \"metrics_temp.yaml\",\n        \"metrics.yaml\",\n        name=\"copy-metrics\",\n        metrics_no_cache=[\"metrics.yaml\"],\n    )\n    scm.add([\"metrics.yaml\", \"metrics.yaml.dvc\"])\n    scm.commit(\"init\")\n\n    with tmp_dir.branch(\"branch\", new=True):\n        tmp_dir.scm_gen(\"metrics.yaml\", \"foo: 2\", commit=\"branch\")\n\n    assert dvc.metrics.show(revs=[\"branch\"]) == {\n        \"workspace\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 1}}}},\n        \"branch\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 2}}}},\n    }\n\n\ndef test_show_subrepo_with_preexisting_tags(tmp_dir, scm):\n    tmp_dir.gen(\"foo\", \"foo\")\n    scm.add(\"foo\")\n    scm.commit(\"init\")\n    scm.tag(\"no-metrics\")\n\n    tmp_dir.gen({\"subdir\": {}})\n    subrepo_dir = tmp_dir / \"subdir\"\n    with subrepo_dir.chdir():\n        dvc = Repo.init(subdir=True)\n        scm.commit(\"init dvc\")\n\n        dvc.run(\n            cmd=\"echo foo: 1 > metrics.yaml\",\n            metrics=[\"metrics.yaml\"],\n            name=\"generate-metrics\",\n        )\n\n    scm.add(\n        [\n            str(subrepo_dir / \"metrics.yaml\"),\n            str(subrepo_dir / \"dvc.yaml\"),\n            str(subrepo_dir / \"dvc.lock\"),\n        ]\n    )\n    scm.commit(\"init metrics\")\n    scm.tag(\"v1\")\n\n    expected_path = \"metrics.yaml\"\n    assert dvc.metrics.show(all_tags=True) == {\n        \"workspace\": {\"data\": {expected_path: {\"data\": {\"foo\": 1}}}},\n        \"v1\": {\"data\": {expected_path: {\"data\": {\"foo\": 1}}}},\n    }\n\n\ndef test_missing_cache(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metrics_t.yaml\", \"1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\", \"metrics.yaml\", name=\"copy-metrics\", metrics=[\"metrics.yaml\"]\n    )\n\n    # This one should be skipped\n    stage = run_copy_metrics(\n        \"metrics_t.yaml\",\n        \"metrics2.yaml\",\n        name=\"copy-metrics2\",\n        metrics=[\"metrics2.yaml\"],\n    )\n    remove(stage.outs[0].fspath)\n    remove(stage.outs[0].cache_path)\n\n    result = dvc.metrics.show()\n    metrics2 = result[\"\"][\"data\"].pop(\"metrics2.yaml\")\n    assert isinstance(metrics2[\"error\"], FileNotFoundError)\n    assert result == {\"\": {\"data\": {\"metrics.yaml\": {\"data\": 1.1}}}}\n\n\n@pytest.mark.parametrize(\"use_dvc\", [True, False])\ndef test_show_non_metric(tmp_dir, scm, use_dvc):\n    tmp_dir.gen(\"metrics.yaml\", \"foo: 1.1\")\n\n    if use_dvc:\n        dvc = Repo.init()\n    else:\n        dvc = Repo(uninitialized=True)\n\n    assert dvc.metrics.show(targets=[\"metrics.yaml\"]) == {\n        \"\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 1.1}}}}\n    }\n\n    if not use_dvc:\n        assert not (tmp_dir / \".dvc\").exists()\n\n\n@pytest.mark.parametrize(\"use_dvc\", [True, False])\ndef test_show_non_metric_branch(tmp_dir, scm, use_dvc):\n    tmp_dir.scm_gen(\"metrics.yaml\", \"foo: 1.1\", commit=\"init\")\n    with tmp_dir.branch(\"branch\", new=True):\n        tmp_dir.scm_gen(\"metrics.yaml\", \"foo: 2.2\", commit=\"other\")\n\n    if use_dvc:\n        dvc = Repo.init()\n    else:\n        dvc = Repo(uninitialized=True)\n\n    assert dvc.metrics.show(targets=[\"metrics.yaml\"], revs=[\"branch\"]) == {\n        \"workspace\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 1.1}}}},\n        \"branch\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 2.2}}}},\n    }\n\n    if not use_dvc:\n        assert not (tmp_dir / \".dvc\").exists()\n\n\ndef test_non_metric_and_dir_show(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen({\"metrics_t.yaml\": \"foo: 1.1\", \"metrics\": {\"metric1.yaml\": \"bar: 1.2\"}})\n\n    metric2 = os.fspath(tmp_dir / \"metrics\" / \"metric2.yaml\")\n    run_copy_metrics(\"metrics_t.yaml\", metric2, name=\"copy-metric2\", metrics=[metric2])\n\n    assert dvc.metrics.show(targets=[\"metrics_t.yaml\", \"metrics\"]) == {\n        \"\": {\n            \"data\": {\n                os.path.join(\"metrics\", \"metric1.yaml\"): {\"data\": {\"bar\": 1.2}},\n                os.path.join(\"metrics\", \"metric2.yaml\"): {\"data\": {\"foo\": 1.1}},\n                \"metrics_t.yaml\": {\"data\": {\"foo\": 1.1}},\n            }\n        }\n    }\n\n\ndef test_show_falsey(tmp_dir, dvc):\n    tmp_dir.gen(\"metrics.json\", '{\"foo\": 0, \"bar\": 0.0, \"baz\": {}}')\n    assert dvc.metrics.show(targets=[\"metrics.json\"]) == {\n        \"\": {\"data\": {\"metrics.json\": {\"data\": {\"foo\": 0, \"bar\": 0.0}}}}\n    }\n\n\ndef test_show_no_repo(tmp_dir):\n    tmp_dir.gen(\"metrics.json\", '{\"foo\": 0, \"bar\": 0.0, \"baz\": {}}')\n\n    dvc = Repo(uninitialized=True)\n\n    assert dvc.metrics.show(targets=[\"metrics.json\"]) == {\n        \"\": {\"data\": {\"metrics.json\": {\"data\": {\"foo\": 0, \"bar\": 0.0}}}}\n    }\n\n\ndef test_show_malformed_metric(tmp_dir, scm, dvc, caplog):\n    tmp_dir.gen(\"metric.json\", '{\"m\":1')\n\n    assert isinstance(\n        dvc.metrics.show(targets=[\"metric.json\"])[\"\"][\"data\"][\"metric.json\"][\"error\"],\n        JSONFileCorruptedError,\n    )\n\n\ndef test_metrics_show_no_target(tmp_dir, dvc, capsys):\n    assert dvc.metrics.show(targets=[\"metrics.json\"]) == {\n        \"\": {\"data\": {\"metrics.json\": {\"error\": M.instance_of(FileNotFoundError)}}}\n    }\n\n\ndef test_show_no_metrics_files(tmp_dir, dvc, caplog):\n    assert dvc.metrics.show() == {\"\": {\"data\": {}}}\n\n\n@pytest.mark.parametrize(\"clear_before_run\", [True, False])\n@pytest.mark.skip(reason=\"no longer raising graph errors\")\ndef test_metrics_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run):\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n\n    (data_dir / \"m1_temp.yaml\").dump({\"a\": {\"b\": {\"c\": 2, \"d\": 1}}})\n    run_copy_metrics(\n        str(data_dir / \"m1_temp.yaml\"),\n        str(data_dir / \"m1.yaml\"),\n        single_stage=False,\n        commit=\"add m1\",\n        name=\"cp-m1\",\n        metrics=[str(data_dir / \"m1.yaml\")],\n    )\n    with (tmp_dir / \"dvc.yaml\").modify() as d:\n        # trying to make an output overlaps error\n        d[\"stages\"][\"corrupted-stage\"] = {\"cmd\": \"mkdir data\", \"outs\": [\"data\"]}\n\n    # running by clearing and not clearing stuffs\n    # so as it works even for optimized cases\n    if clear_before_run:\n        remove(data_dir)\n        remove(dvc.cache.local.path)\n\n    dvc._reset()\n\n    res = dvc.metrics.show()\n    assert isinstance(res[\"\"][\"error\"], OverlappingOutputPathsError)\n\n\n@pytest.mark.parametrize(\n    \"file,error_path,err_type\",\n    [\n        (PROJECT_FILE, [\"workspace\", \"error\", \"type\"], \"YAMLSyntaxError\"),\n        (\n            \"metrics.yaml\",\n            [\"workspace\", \"data\", \"metrics.yaml\", \"error\", \"type\"],\n            \"YAMLFileCorruptedError\",\n        ),\n    ],\n)\ndef test_log_errors(\n    tmp_dir, scm, dvc, capsys, run_copy_metrics, file, error_path, err_type\n):\n    tmp_dir.gen(\"metrics_t.yaml\", \"m: 1.1\")\n    run_copy_metrics(\n        \"metrics_t.yaml\",\n        \"metrics.yaml\",\n        metrics=[\"metrics.yaml\"],\n        single_stage=False,\n        name=\"train\",\n    )\n    scm.tag(\"v1\")\n\n    with open(file, \"a\", encoding=\"utf-8\") as fd:\n        fd.write(\"\\nMALFORMED!\")\n\n    assert main([\"metrics\", \"show\", \"--all-tags\", \"--json\"]) == 0\n\n    out, error = capsys.readouterr()\n    result = json.loads(out)\n\n    assert get_in(result, error_path) == err_type\n    assert (\n        \"DVC failed to load some metrics for following revisions: 'workspace'.\" in error\n    )\n\n\ndef test_cached_metrics(tmp_dir, dvc, scm, remote):\n    tmp_dir.dvc_gen(\n        {\n            \"dir\": {\"metrics.yaml\": \"foo: 3\\nbar: 10\"},\n            \"dir2\": {\"metrics.yaml\": \"foo: 42\\nbar: 4\"},\n        }\n    )\n    dvc.push()\n    dvc.cache.local.clear()\n\n    (tmp_dir / \"dvc.yaml\").dump({\"metrics\": [\"dir/metrics.yaml\", \"dir2\"]})\n\n    assert dvc.metrics.show() == {\n        \"\": {\n            \"data\": {\n                join(\"dir\", \"metrics.yaml\"): {\"data\": {\"foo\": 3, \"bar\": 10}},\n                join(\"dir2\", \"metrics.yaml\"): {\"data\": {\"foo\": 42, \"bar\": 4}},\n            }\n        }\n    }\n\n\ndef test_top_level_parametrized(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"metrics.yaml\", \"foo: 3\\nbar: 10\")\n    (tmp_dir / \"params.yaml\").dump({\"metric_file\": \"metrics.yaml\"})\n    (tmp_dir / \"dvc.yaml\").dump({\"metrics\": [\"${metric_file}\"]})\n    assert dvc.metrics.show() == {\n        \"\": {\"data\": {\"metrics.yaml\": {\"data\": {\"foo\": 3, \"bar\": 10}}}}\n    }\n\n\ndef test_metric_in_a_tracked_directory_with_missing_dir_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"file\": \"2\"}})\n    (tmp_dir / \"dvc.yaml\").dump({\"metrics\": [join(\"dir\", \"file\")]})\n    shutil.rmtree(tmp_dir / \"dir\")  # remove from workspace\n    dvc.cache.local.clear()  # remove .dir file\n\n    assert dvc.metrics.show() == {\n        \"\": Result(\n            data={\n                join(\"dir\", \"file\"): FileResult(error=M.instance_of(DataIndexDirError)),\n            }\n        )\n    }\n"
  },
  {
    "path": "tests/func/params/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/params/test_diff.py",
    "content": "from os.path import join\n\nimport pytest\n\nfrom dvc.testing import matchers as M\nfrom dvc.utils import relpath\n\n\ndef test_diff_no_params(tmp_dir, scm, dvc):\n    assert dvc.params.diff() == {}\n\n\ndef test_diff_no_changes(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n    assert dvc.params.diff() == {}\n\n\ndef test_diff(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\", commit=\"baz\")\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: qux\", commit=\"qux\")\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\") == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": \"bar\", \"new\": \"qux\"}}}\n    }\n\n\ndef test_diff_dirty(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\", commit=\"baz\")\n    tmp_dir.gen(\"params.yaml\", \"foo: qux\")\n\n    assert dvc.params.diff() == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": \"baz\", \"new\": \"qux\"}}}\n    }\n\n\ndef test_diff_new(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n\n    assert dvc.params.diff() == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": None, \"new\": \"bar\"}}}\n    }\n\n\ndef test_diff_deleted(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n\n    (tmp_dir / \"params.yaml\").unlink()\n\n    assert dvc.params.diff() == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": \"bar\", \"new\": None}}},\n        \"errors\": {\"workspace\": {\"params.yaml\": M.instance_of(FileNotFoundError)}},\n    }\n\n\ndef test_diff_list(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo:\\n- bar\\n- baz\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"foo\")\n\n    tmp_dir.gen(\"params.yaml\", \"foo:\\n- bar\\n- baz\\n- qux\")\n\n    assert dvc.params.diff() == {\n        \"diff\": {\n            \"params.yaml\": {\n                \"foo\": {\"old\": \"['bar', 'baz']\", \"new\": \"['bar', 'baz', 'qux']\"}\n            }\n        }\n    }\n\n\ndef test_diff_dict(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo:\\n  bar: baz\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"foo\")\n\n    tmp_dir.gen(\"params.yaml\", \"foo:\\n  bar: qux\")\n\n    assert dvc.params.diff() == {\n        \"diff\": {\"params.yaml\": {\"foo.bar\": {\"old\": \"baz\", \"new\": \"qux\"}}}\n    }\n\n\ndef test_diff_with_unchanged(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\\nxyz: val\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo,xyz\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\\nxyz: val\", commit=\"baz\")\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: qux\\nxyz: val\", commit=\"qux\")\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\", all=True) == {\n        \"diff\": {\n            \"params.yaml\": {\n                \"foo\": {\"old\": \"bar\", \"new\": \"qux\"},\n                \"xyz\": {\"old\": \"val\", \"new\": \"val\"},\n            }\n        }\n    }\n\n\ndef test_pipeline_tracked_params(tmp_dir, scm, dvc, run_copy):\n    from dvc.dvcfile import PROJECT_FILE\n\n    tmp_dir.gen({\"foo\": \"foo\", \"params.yaml\": \"foo: bar\\nxyz: val\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\", params=[\"foo,xyz\"])\n\n    scm.add([\"params.yaml\", PROJECT_FILE])\n    scm.commit(\"add stage\")\n\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\\nxyz: val\", commit=\"baz\")\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: qux\\nxyz: val\", commit=\"qux\")\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\") == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": \"bar\", \"new\": \"qux\"}}}\n    }\n\n\ndef test_no_commits(tmp_dir):\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\n    git = Git.init(tmp_dir.fs_path)\n    assert git.no_commits\n\n    assert Repo.init().params.diff() == {}\n\n\ndef test_vars_shows_on_params_diff(tmp_dir, scm, dvc):\n    params_file = tmp_dir / \"test_params.yaml\"\n    param_data = {\"vars\": {\"model1\": {\"epoch\": 15}, \"model2\": {\"epoch\": 35}}}\n    (tmp_dir / params_file).dump(param_data)\n    d = {\n        \"vars\": [\"test_params.yaml\"],\n        \"stages\": {\n            \"build\": {\n                \"foreach\": \"${vars}\",\n                \"do\": {\"cmd\": \"script --epoch ${item.epoch}\"},\n            }\n        },\n    }\n    (tmp_dir / \"dvc.yaml\").dump(d)\n    assert dvc.params.diff() == {\n        \"diff\": {\n            \"test_params.yaml\": {\n                \"vars.model1.epoch\": {\"new\": 15, \"old\": None},\n                \"vars.model2.epoch\": {\"new\": 35, \"old\": None},\n            }\n        }\n    }\n    scm.add([\"dvc.yaml\", \"test_params.yaml\"])\n    scm.commit(\"added stages\")\n\n    param_data[\"vars\"][\"model1\"][\"epoch\"] = 20\n    (tmp_dir / params_file).dump(param_data)\n    assert dvc.params.diff() == {\n        \"diff\": {\n            \"test_params.yaml\": {\"vars.model1.epoch\": {\"new\": 20, \"old\": 15, \"diff\": 5}}\n        }\n    }\n\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n    with data_dir.chdir():\n        assert dvc.params.diff() == {\n            \"diff\": {\n                str(params_file.relative_to(tmp_dir)): {\n                    \"vars.model1.epoch\": {\"new\": 20, \"old\": 15, \"diff\": 5}\n                }\n            }\n        }\n\n\ndef test_diff_targeted(tmp_dir, scm, dvc, run_copy):\n    from dvc.dvcfile import PROJECT_FILE\n\n    tmp_dir.gen(\n        {\n            \"foo\": \"foo\",\n            \"params.yaml\": \"foo: bar\",\n            \"other_params.yaml\": \"xyz: val\",\n        }\n    )\n    run_copy(\n        \"foo\",\n        \"bar\",\n        name=\"copy-foo-bar\",\n        params=[\"foo\", \"other_params.yaml:xyz\"],\n    )\n\n    scm.add([\"params.yaml\", \"other_params.yaml\", PROJECT_FILE])\n    scm.commit(\"add stage\")\n\n    tmp_dir.scm_gen(\n        {\"params.yaml\": \"foo: baz\", \"other_params.yaml\": \"xyz: val2\"},\n        commit=\"baz\",\n    )\n    tmp_dir.scm_gen(\n        {\"params.yaml\": \"foo: qux\", \"other_params.yaml\": \"xyz: val3\"},\n        commit=\"qux\",\n    )\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\") == {\n        \"diff\": {\n            \"params.yaml\": {\"foo\": {\"old\": \"bar\", \"new\": \"qux\"}},\n            \"other_params.yaml\": {\"xyz\": {\"old\": \"val\", \"new\": \"val3\"}},\n        }\n    }\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\", targets=[\"params.yaml\"]) == {\n        \"diff\": {\"params.yaml\": {\"foo\": {\"old\": \"bar\", \"new\": \"qux\"}}}\n    }\n\n    assert dvc.params.diff(a_rev=\"HEAD~2\", targets=[\"other_params.yaml\"]) == {\n        \"diff\": {\"other_params.yaml\": {\"xyz\": {\"old\": \"val\", \"new\": \"val3\"}}}\n    }\n\n\n@pytest.mark.parametrize(\"file\", [\"params.yaml\", \"other_params.yaml\"])\ndef test_diff_without_targets_specified(tmp_dir, dvc, scm, file):\n    params_file = tmp_dir / file\n    params_file.dump({\"foo\": {\"bar\": \"bar\"}, \"x\": \"0\"})\n    dvc.stage.add(name=\"test\", cmd=f\"echo {file}\", params=[{file: None}])\n    scm.add_commit([params_file, \"dvc.yaml\"], message=\"foo\")\n\n    params_file.dump({\"foo\": {\"bar\": \"baz\"}, \"y\": \"100\"})\n    assert dvc.params.diff() == {\n        \"diff\": {\n            file: {\n                \"foo.bar\": {\"new\": \"baz\", \"old\": \"bar\"},\n                \"x\": {\"new\": None, \"old\": \"0\"},\n                \"y\": {\"new\": \"100\", \"old\": None},\n            }\n        }\n    }\n\n\n@pytest.mark.parametrize(\n    \"dvcfile, params_file\",\n    [\n        (\"dvc.yaml\", \"my_params.yaml\"),\n        (\"dir/dvc.yaml\", \"my_params.yaml\"),\n        (\"dir/dvc.yaml\", join(\"..\", \"my_params.yaml\")),\n    ],\n)\ndef test_diff_top_level_params(tmp_dir, dvc, scm, dvcfile, params_file):\n    directory = (tmp_dir / dvcfile).parent\n    directory.mkdir(exist_ok=True)\n    (tmp_dir / dvcfile).dump({\"params\": [params_file]})\n\n    params_file = directory / params_file\n    params_file.dump({\"foo\": 3})\n    scm.add_commit([params_file, tmp_dir / dvcfile], message=\"add params\")\n\n    params_file.dump({\"foo\": 5})\n    assert dvc.params.diff() == {\n        \"diff\": {\n            relpath(directory / params_file): {\"foo\": {\"diff\": 2, \"new\": 5, \"old\": 3}}\n        }\n    }\n\n\ndef test_diff_active_branch_no_changes(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"bar\")\n    assert dvc.params.diff(a_rev=tmp_dir.scm.active_branch()) == {}\n"
  },
  {
    "path": "tests/func/params/test_show.py",
    "content": "import shutil\nfrom os.path import join\n\nimport pytest\n\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.repo import Repo\nfrom dvc.repo.metrics.show import FileResult, Result\nfrom dvc.testing import matchers as M\nfrom dvc_data.index import DataIndexDirError\n\n\ndef test_show_empty(dvc):\n    assert dvc.params.show() == {\"\": {\"data\": {}}}\n\n\ndef test_show(tmp_dir, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    assert dvc.params.show() == {\n        \"\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"bar\"}}}}\n    }\n\n\ndef test_show_targets(tmp_dir, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    expected = {\"\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"bar\"}}}}}\n    assert dvc.params.show(targets=[\"params.yaml\"]) == expected\n    assert dvc.params.show(targets=(tmp_dir / \"params.yaml\").fs_path) == expected\n\n\ndef test_show_toml(tmp_dir, dvc):\n    tmp_dir.gen(\"params.toml\", \"[foo]\\nbar = 42\\nbaz = [1, 2]\\n\")\n    dvc.run(cmd=\"echo params.toml\", params=[\"params.toml:foo\"], name=\"echo-params\")\n    assert dvc.params.show() == {\n        \"\": {\"data\": {\"params.toml\": {\"data\": {\"foo\": {\"bar\": 42, \"baz\": [1, 2]}}}}}\n    }\n\n\ndef test_show_py(tmp_dir, dvc):\n    tmp_dir.gen(\n        \"params.py\",\n        \"CONST = 1\\nIS_DIR: bool = True\\n\\n\\nclass Config:\\n    foo = 42\\n\",\n    )\n    dvc.run(\n        cmd=\"echo params.py\",\n        params=[\"params.py:CONST,IS_DIR,Config.foo\"],\n        name=\"echo-params\",\n    )\n    assert dvc.params.show() == {\n        \"\": {\n            \"data\": {\n                \"params.py\": {\n                    \"data\": {\"CONST\": 1, \"Config\": {\"foo\": 42}, \"IS_DIR\": True}\n                }\n            }\n        }\n    }\n\n\ndef test_show_multiple(tmp_dir, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\\nbaz: qux\\n\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params1\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"baz\"], name=\"echo-params2\")\n    assert dvc.params.show() == {\n        \"\": {\"data\": {\"params.yaml\": {\"data\": {\"baz\": \"qux\", \"foo\": \"bar\"}}}}\n    }\n\n\ndef test_show_list(tmp_dir, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo:\\n- bar\\n- baz\\n\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    assert dvc.params.show() == {\n        \"\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": [\"bar\", \"baz\"]}}}}\n    }\n\n\ndef test_show_branch(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: bar\")\n    dvc.run(cmd=\"echo params.yaml\", params=[\"foo\"], name=\"echo-params\")\n    scm.add([\"params.yaml\", \"Dvcfile\"])\n    scm.commit(\"init\")\n\n    with tmp_dir.branch(\"branch\", new=True):\n        tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\", commit=\"branch\")\n\n    assert dvc.params.show(revs=[\"branch\"]) == {\n        \"branch\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"baz\"}}}},\n        \"workspace\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"bar\"}}}},\n    }\n\n\ndef test_pipeline_params(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"params.yaml\": \"foo: bar\\nxyz: val\\nabc: ignore\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\", params=[\"foo,xyz\"])\n    scm.add([\"params.yaml\", PROJECT_FILE])\n    scm.commit(\"add stage\")\n\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: baz\\nxyz: val\\nabc: ignore\", commit=\"baz\")\n    tmp_dir.scm_gen(\"params.yaml\", \"foo: qux\\nxyz: val\\nabc: ignore\", commit=\"qux\")\n\n    assert dvc.params.show(revs=[\"master\"], deps_only=True) == {\n        \"master\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"qux\", \"xyz\": \"val\"}}}}\n    }\n    assert dvc.params.show(revs=[\"master\"]) == {\n        \"master\": {\n            \"data\": {\n                \"params.yaml\": {\"data\": {\"abc\": \"ignore\", \"foo\": \"qux\", \"xyz\": \"val\"}}\n            }\n        }\n    }\n\n\ndef test_show_no_repo(tmp_dir):\n    tmp_dir.gen({\"foo\": \"foo\", \"params_file.yaml\": \"foo: bar\\nxyz: val\"})\n\n    dvc = Repo(uninitialized=True)\n\n    assert dvc.params.show(targets=[\"params_file.yaml\"]) == {\n        \"\": {\"data\": {\"params_file.yaml\": {\"data\": {\"foo\": \"bar\", \"xyz\": \"val\"}}}}\n    }\n\n\n@pytest.mark.parametrize(\"file\", [\"params.yaml\", \"other_params.yaml\"])\ndef test_show_without_targets_specified(tmp_dir, dvc, scm, file):\n    params_file = tmp_dir / file\n    data = {\"foo\": {\"bar\": \"bar\"}, \"x\": \"0\"}\n    params_file.dump(data)\n    dvc.stage.add(name=\"test\", cmd=f\"echo {file}\", params=[{file: None}])\n\n    assert dvc.params.show() == {\"\": {\"data\": {file: {\"data\": data}}}}\n\n\ndef test_deps_multi_stage(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"params.yaml\": \"foo: bar\\nxyz: val\\nabc: ignore\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\", params=[\"foo\"])\n    run_copy(\"foo\", \"bar1\", name=\"copy-foo-bar-1\", params=[\"xyz\"])\n\n    scm.add([\"params.yaml\", PROJECT_FILE])\n    scm.commit(\"add stage\")\n\n    assert dvc.params.show(revs=[\"master\"], deps_only=True) == {\n        \"master\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": \"bar\", \"xyz\": \"val\"}}}}\n    }\n\n\ndef test_deps_with_targets(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"params.yaml\": \"foo: bar\\nxyz: val\\nabc: ignore\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\", params=[\"foo\"])\n    run_copy(\"foo\", \"bar1\", name=\"copy-foo-bar-1\", params=[\"xyz\"])\n\n    scm.add([\"params.yaml\", PROJECT_FILE])\n    scm.commit(\"add stage\")\n\n    assert dvc.params.show(targets=[\"params.yaml\"], deps_only=True) == {\n        \"\": {\n            \"data\": {\n                \"params.yaml\": {\"data\": {\"abc\": \"ignore\", \"foo\": \"bar\", \"xyz\": \"val\"}}\n            }\n        }\n    }\n\n\ndef test_cached_params(tmp_dir, dvc, scm, remote):\n    tmp_dir.dvc_gen(\n        {\n            \"dir\": {\"params.yaml\": \"foo: 3\\nbar: 10\"},\n            \"dir2\": {\"params.yaml\": \"foo: 42\\nbar: 4\"},\n        }\n    )\n    dvc.push()\n    dvc.cache.local.clear()\n\n    (tmp_dir / \"dvc.yaml\").dump({\"params\": [\"dir/params.yaml\", \"dir2\"]})\n\n    assert dvc.params.show() == {\n        \"\": {\n            \"data\": {\n                join(\"dir\", \"params.yaml\"): {\"data\": {\"foo\": 3, \"bar\": 10}},\n                join(\"dir2\", \"params.yaml\"): {\"data\": {\"foo\": 42, \"bar\": 4}},\n            }\n        }\n    }\n\n\ndef test_top_level_parametrized(tmp_dir, dvc):\n    (tmp_dir / \"param.json\").dump({\"foo\": 3, \"bar\": 10})\n    (tmp_dir / \"params.yaml\").dump({\"param_file\": \"param.json\"})\n    (tmp_dir / \"dvc.yaml\").dump({\"params\": [\"${param_file}\"]})\n    assert dvc.params.show() == {\n        \"\": {\n            \"data\": {\n                \"param.json\": {\"data\": {\"foo\": 3, \"bar\": 10}},\n                \"params.yaml\": {\"data\": {\"param_file\": \"param.json\"}},\n            }\n        }\n    }\n\n\ndef test_param_in_a_tracked_directory_with_missing_dir_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"file\": \"2\"}})\n    (tmp_dir / \"dvc.yaml\").dump({\"params\": [join(\"dir\", \"file\")]})\n    shutil.rmtree(tmp_dir / \"dir\")  # remove from workspace\n    dvc.cache.local.clear()  # remove .dir file\n\n    assert dvc.params.show() == {\n        \"\": Result(\n            data={\n                join(\"dir\", \"file\"): FileResult(error=M.instance_of(DataIndexDirError)),\n            }\n        )\n    }\n"
  },
  {
    "path": "tests/func/parsing/__init__.py",
    "content": "from dvc.parsing import DataResolver, EntryDefinition, ForeachDefinition\nfrom dvc.parsing.context import Context\n\nTEMPLATED_DVC_YAML_DATA = {\n    \"stages\": {\n        \"stage1\": {\n            \"cmd\": \"python script.py ${dict.foo} --out ${dict.bar}\",\n            \"outs\": [\"${dict.bar}\"],\n            \"deps\": [\"${dict.foo}\"],\n            \"frozen\": \"${freeze}\",\n        },\n        \"stage2\": {\"cmd\": \"echo ${dict.foo} ${dict.bar}\"},\n    }\n}\n\nCONTEXT_DATA = {\n    \"dict\": {\"foo\": \"foo\", \"bar\": \"bar\"},\n    \"list\": [\"param1\", \"param2\"],\n    \"freeze\": True,\n}\n\nRESOLVED_DVC_YAML_DATA = {\n    \"stages\": {\n        \"stage1\": {\n            \"cmd\": \"python script.py foo --out bar\",\n            \"outs\": [\"bar\"],\n            \"deps\": [\"foo\"],\n            \"frozen\": True,\n        },\n        \"stage2\": {\"cmd\": \"echo foo bar\"},\n    }\n}\n\nUSED_VARS = {\n    \"stage1\": {\"dict.foo\": \"foo\", \"dict.bar\": \"bar\", \"freeze\": True},\n    \"stage2\": {\"dict.foo\": \"foo\", \"dict.bar\": \"bar\"},\n}\n\n\ndef make_entry_definition(wdir, name, data, context=None) -> EntryDefinition:\n    return EntryDefinition(\n        DataResolver(wdir.dvc, wdir.fs_path, {}),\n        context or Context(),\n        name,\n        data,\n    )\n\n\ndef make_foreach_def(\n    wdir, name, foreach_data, do_data=None, context=None\n) -> ForeachDefinition:\n    return ForeachDefinition(\n        DataResolver(wdir.dvc, wdir.fs_path, {}),\n        context or Context(),\n        name,\n        {\"foreach\": foreach_data, \"do\": do_data or {}},\n    )\n"
  },
  {
    "path": "tests/func/parsing/test_errors.py",
    "content": "\"\"\"Negative tests for the parametrization.\"\"\"\n\nimport logging\nimport re\n\nimport pytest\n\nfrom dvc.parsing import ResolveError\nfrom dvc.parsing.context import Context\nfrom dvc.parsing.interpolate import embrace\nfrom dvc.utils.humanize import join\n\nfrom . import make_entry_definition, make_foreach_def\n\n\ndef escape_ansi(line):\n    ansi_escape = re.compile(r\"(\\x9B|\\x1B\\[)[0-?]*[ -\\/]*[@-~]\")\n    return ansi_escape.sub(\"\", line)\n\n\n# Tests for the interpolated entries\n\n\n@pytest.mark.parametrize(\"vars_\", [\"${file}_params.yaml\", {\"foo\": \"${foo}\"}])\ndef test_vars_interpolation_errors(tmp_dir, dvc, vars_):\n    definition = make_entry_definition(tmp_dir, \"build\", {\"vars\": [vars_]})\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'stages.build.vars' in 'dvc.yaml': \"\n        \"interpolating is not allowed\"\n    )\n\n\ndef test_failed_to_interpolate(tmp_dir, dvc):\n    context = Context(models={\"foo\": \"bar\"})\n    definition = make_entry_definition(\n        tmp_dir, \"build\", {\"cmd\": \"echo ${models.foo.}\"}, context\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        escape_ansi(str(exc_info.value))\n        == \"failed to parse 'stages.build.cmd' in 'dvc.yaml':\\n\"\n        \"${models.foo.}\\n\"\n        \"            ^\\n\"\n        \"ParseException: Expected end of text, found '.'\"\n        \"  (at char 12), (line:1, col:13)\"\n    )\n    assert definition.context == {\"models\": {\"foo\": \"bar\"}}\n\n\ndef test_local_vars_params_file_not_exist(tmp_dir, dvc):\n    definition = make_entry_definition(\n        tmp_dir,\n        \"build\",\n        {\"vars\": [\"not_existing_params.yaml\"], \"cmd\": \"echo ${models.foo}\"},\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse stage 'build' in 'dvc.yaml': \"\n        \"'not_existing_params.yaml' does not exist\"\n    )\n    assert not definition.context\n\n\ndef test_specified_key_does_not_exist(tmp_dir, dvc):\n    definition = make_entry_definition(\n        tmp_dir,\n        \"build\",\n        {\"cmd\": \"echo ${models.foobar}\"},\n        Context(models={\"foo\": \"foo\"}),\n    )\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'stages.build.cmd' in 'dvc.yaml': \"\n        \"Could not find 'models.foobar'\"\n    )\n    assert definition.context == {\"models\": {\"foo\": \"foo\"}}\n\n\n@pytest.mark.parametrize(\n    \"wdir, expected_msg\",\n    [\n        (\"${models[foobar]}\", \" Could not find 'models.foobar'\"),\n        (\n            \"${models.foo]}\",\n            (\n                \"\\n${models.foo]}\\n\"\n                \"            ^\\n\"\n                \"ParseException: Expected end of text, found ']'\"\n                \"  (at char 12), (line:1, col:13)\"\n            ),\n        ),\n    ],\n)\ndef test_wdir_failed_to_interpolate(tmp_dir, dvc, wdir, expected_msg):\n    definition = make_entry_definition(\n        tmp_dir,\n        \"build\",\n        {\"wdir\": wdir, \"cmd\": \"echo ${models.bar}\"},\n        Context(models={\"bar\": \"bar\"}),\n    )\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert escape_ansi(str(exc_info.value)) == (\n        \"failed to parse 'stages.build.wdir' in 'dvc.yaml':\" + expected_msg\n    )\n    assert definition.context == {\"models\": {\"bar\": \"bar\"}}\n\n\ndef test_interpolate_non_string(tmp_dir, dvc):\n    definition = make_entry_definition(\n        tmp_dir, \"build\", {\"outs\": \"${models}\"}, Context(models={})\n    )\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'stages.build.outs' in 'dvc.yaml':\\n\"\n        \"Cannot interpolate data of type 'dict'\"\n    )\n    assert definition.context == {\"models\": {}}\n\n\ndef test_interpolate_nested_iterable(tmp_dir, dvc):\n    definition = make_entry_definition(\n        tmp_dir,\n        \"build\",\n        {\"cmd\": \"echo ${models}\"},\n        Context(models={\"list\": [1, [2, 3]]}),\n    )\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'stages.build.cmd' in 'dvc.yaml':\\n\"\n        \"Cannot interpolate nested iterable in 'list'\"\n    )\n\n\ndef test_partial_vars_doesnot_exist(tmp_dir, dvc):\n    (tmp_dir / \"test_params.yaml\").dump({\"sub1\": \"sub1\", \"sub2\": \"sub2\"})\n\n    definition = make_entry_definition(\n        tmp_dir,\n        \"build\",\n        {\"vars\": [\"test_params.yaml:sub3\"], \"cmd\": \"echo ${sub1} ${sub2}\"},\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse stage 'build' in 'dvc.yaml': \"\n        \"could not find 'sub3' in 'test_params.yaml'\"\n    )\n    assert not definition.context\n\n\n# Tests foreach generated stages and their error messages\n\n\ndef test_foreach_data_syntax_error(tmp_dir, dvc):\n    definition = make_foreach_def(tmp_dir, \"build\", \"${syntax.[error}\", {})\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n\n    assert (\n        escape_ansi(str(exc_info.value))\n        == \"failed to parse 'stages.build.foreach' in 'dvc.yaml':\\n\"\n        \"${syntax.[error}\\n\"\n        \"        ^\\n\"\n        \"ParseException: Expected end of text, found '.'\"\n        \"  (at char 8), (line:1, col:9)\"\n    )\n\n\n@pytest.mark.parametrize(\"key\", [\"modelss\", \"modelss.123\"])\ndef test_foreach_data_key_does_not_exists(tmp_dir, dvc, key):\n    definition = make_foreach_def(tmp_dir, \"build\", embrace(key), {})\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n    assert (\n        str(exc_info.value) == \"failed to parse 'stages.build.foreach' in 'dvc.yaml': \"\n        f\"Could not find '{key}'\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"foreach_data\", [\"${foo}\", \"${dct.model1}\", \"${lst.0}\", \"foobar\"]\n)\ndef test_foreach_data_expects_list_or_dict(tmp_dir, dvc, foreach_data):\n    context = Context({\"foo\": \"bar\", \"dct\": {\"model1\": \"a-out\"}, \"lst\": [\"foo\", \"bar\"]})\n    definition = make_foreach_def(tmp_dir, \"build\", foreach_data, {}, context)\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n    assert (\n        str(exc_info.value)\n        == \"failed to resolve 'stages.build.foreach' in 'dvc.yaml': \"\n        \"expected list/dictionary, got str\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"global_data, where\",\n    [\n        ({\"item\": 10, \"key\": 10}, \"item and key are\"),\n        ({\"item\": 10}, \"item is\"),\n        ({\"key\": 5}, \"key is\"),\n    ],\n)\ndef test_foreach_overwriting_item_in_list(tmp_dir, dvc, caplog, global_data, where):\n    context = Context(global_data)\n    definition = make_foreach_def(\n        tmp_dir, \"build\", {\"model1\": 10, \"model2\": 5}, {}, context\n    )\n    with caplog.at_level(logging.WARNING, logger=\"dvc.parsing\"):\n        definition.resolve_all()\n\n    assert caplog.messages == [\n        f\"{where} already specified, \"\n        \"will be overwritten for stages generated from 'build'\"\n    ]\n\n\ndef test_foreach_do_syntax_errors(tmp_dir, dvc):\n    definition = make_foreach_def(\n        tmp_dir, \"build\", [\"foo\", \"bar\"], {\"cmd\": \"echo ${syntax.[error}\"}\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n\n    assert (\n        escape_ansi(str(exc_info.value))\n        == \"failed to parse 'stages.build.cmd' in 'dvc.yaml':\\n\"\n        \"${syntax.[error}\\n\"\n        \"        ^\\n\"\n        \"ParseException: Expected end of text, found '.'\"\n        \"  (at char 8), (line:1, col:9)\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"key, loc\",\n    [\n        (\n            \"item.thresh\",  # the `thresh` in not available on model2`\n            \"stages.build@1.cmd\",\n        ),\n        (\"foo.bar\", \"stages.build@0.cmd\"),  # not available on any stages\n    ],\n)\ndef test_foreach_do_definition_item_does_not_exist(tmp_dir, dvc, key, loc):\n    context = Context(foo=\"bar\")\n    definition = make_foreach_def(\n        tmp_dir,\n        \"build\",\n        [{\"thresh\": \"10\"}, {}],\n        {\"cmd\": embrace(key)},\n        context,\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n\n    assert (\n        str(exc_info.value)\n        == f\"failed to parse '{loc}' in 'dvc.yaml': Could not find '{key}'\"\n    )\n\n    # should have no `item` and `key` even though it failed to resolve.\n    assert context == {\"foo\": \"bar\"}\n\n\n@pytest.mark.parametrize(\n    \"redefine\",\n    [\n        {\"item\": 5},\n        {\"key\": 5},\n        {\"item\": 5, \"key\": 10},\n        {\"item\": {\"epochs\": 10}},\n    ],\n)\n@pytest.mark.parametrize(\"from_file\", [True, False])\ndef test_item_key_in_generated_stage_vars(tmp_dir, dvc, redefine, from_file):\n    context = Context(foo=\"bar\")\n    vars_ = [redefine]\n    if from_file:\n        (tmp_dir / \"test_params.yaml\").dump(redefine)\n        vars_ = [\"test_params.yaml\"]\n\n    definition = make_foreach_def(\n        tmp_dir,\n        \"build\",\n        {\"model1\": {\"thresh\": \"10\"}, \"model2\": {\"thresh\": 5}},\n        {\"vars\": vars_, \"cmd\": \"${item}\"},\n        context,\n    )\n\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n\n    message = str(exc_info.value)\n    assert (\n        \"failed to parse stage 'build@model1' in 'dvc.yaml': \"\n        \"attempted to modify reserved\" in message\n    )\n\n    key_or_keys = \"keys\" if len(redefine) > 1 else \"key\"\n    assert f\"{key_or_keys} {join(redefine)}\" in message\n    if from_file:\n        assert \"in 'test_params.yaml'\" in message\n    assert context == {\"foo\": \"bar\"}\n\n\ndef test_foreach_wdir_key_does_not_exist(tmp_dir, dvc):\n    definition = make_foreach_def(\n        tmp_dir,\n        \"build\",\n        \"${models}\",\n        {\"wdir\": \"${ite}\", \"cmd\": \"echo ${item}\"},\n        Context(models=[\"foo\", \"bar\"]),\n    )\n    with pytest.raises(ResolveError) as exc_info:\n        definition.resolve_all()\n    assert (\n        str(exc_info.value)\n        == \"failed to parse 'stages.build@foo.wdir' in 'dvc.yaml': Could not find 'ite'\"\n    )\n    assert definition.context == {\"models\": [\"foo\", \"bar\"]}\n"
  },
  {
    "path": "tests/func/parsing/test_foreach.py",
    "content": "\"\"\"Testing happy paths for the foreach.\"\"\"\n\nimport os\n\nimport pytest\n\nfrom dvc.parsing import DEFAULT_PARAMS_FILE, DataResolver, ForeachDefinition\nfrom dvc.parsing.context import Context\n\n\ndef test_with_simple_list_data(tmp_dir, dvc):\n    \"\"\"Testing a simple non-nested list as a foreach data\"\"\"\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n\n    context = Context()\n    data = {\"foreach\": [\"foo\", \"bar\", \"baz\"], \"do\": {\"cmd\": \"echo ${item}\"}}\n    definition = ForeachDefinition(resolver, context, \"build\", data)\n\n    assert definition.resolve_one(\"foo\") == {\"build@foo\": {\"cmd\": \"echo foo\"}}\n    assert definition.resolve_one(\"bar\") == {\"build@bar\": {\"cmd\": \"echo bar\"}}\n    # check that `foreach` item-key replacement didnot leave any leftovers.\n    assert not context\n    assert not resolver.tracked_vars[\"build@foo\"]\n    assert not resolver.tracked_vars[\"build@bar\"]\n\n\ndef test_with_dict_data(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    context = Context()\n\n    foreach_data = {\"model1\": \"foo\", \"model2\": \"bar\"}\n    data = {\"foreach\": foreach_data, \"do\": {\"cmd\": \"echo ${key} ${item}\"}}\n    definition = ForeachDefinition(resolver, context, \"build\", data)\n\n    assert definition.resolve_one(\"model1\") == {\n        \"build@model1\": {\"cmd\": \"echo model1 foo\"}\n    }\n    assert definition.resolve_one(\"model2\") == {\n        \"build@model2\": {\"cmd\": \"echo model2 bar\"}\n    }\n\n    # check that `foreach` item-key replacement didnot leave any leftovers.\n    assert not context\n    assert not resolver.tracked_vars[\"build@model1\"]\n    assert not resolver.tracked_vars[\"build@model2\"]\n\n\ndef test_with_dict_with_non_str_keys(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    context = Context()\n\n    foreach_data = {2021: {\"thresh\": \"foo\"}, 2022: {\"thresh\": \"bar\"}}\n    data = {\"foreach\": foreach_data, \"do\": {\"cmd\": \"echo ${key} ${item.thresh}\"}}\n    definition = ForeachDefinition(resolver, context, \"build\", data)\n\n    assert definition.resolve_one(\"2021\") == {\"build@2021\": {\"cmd\": \"echo 2021 foo\"}}\n    assert definition.resolve_one(\"2022\") == {\"build@2022\": {\"cmd\": \"echo 2022 bar\"}}\n\n    # check that `foreach` item-key replacement didnot leave any leftovers.\n    assert not context\n    assert not resolver.tracked_vars[\"build@2021\"]\n    assert not resolver.tracked_vars[\"build@2022\"]\n\n\ndef test_with_composite_list(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n\n    context = Context()\n    foreach_data = [{\"thresh\": \"foo\"}, {\"thresh\": \"bar\"}]\n    data = {\"foreach\": foreach_data, \"do\": {\"cmd\": \"echo ${item.thresh}\"}}\n    definition = ForeachDefinition(resolver, context, \"build\", data)\n\n    assert definition.resolve_one(\"0\") == {\"build@0\": {\"cmd\": \"echo foo\"}}\n    # check that `foreach` item-key replacement didnot leave any leftovers.\n    assert not context\n\n    assert definition.resolve_one(\"1\") == {\"build@1\": {\"cmd\": \"echo bar\"}}\n    assert not context\n    assert not resolver.tracked_vars[\"build@0\"]\n\n\ndef test_foreach_interpolated_simple_list(tmp_dir, dvc):\n    foreach_data = [\"foo\", \"bar\", \"baz\"]\n    vars_ = {\"models\": foreach_data}\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [vars_]})\n    data = {\"foreach\": \"${models}\", \"do\": {\"cmd\": \"echo ${item}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@foo\": {\"cmd\": \"echo foo\"},\n        \"build@bar\": {\"cmd\": \"echo bar\"},\n        \"build@baz\": {\"cmd\": \"echo baz\"},\n    }\n    assert resolver.context == {\"models\": foreach_data}\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\n@pytest.mark.parametrize(\"foreach_def\", [\"${item.thresh}\", \"${item[thresh]}\"])\n@pytest.mark.parametrize(\n    \"foreach_data, result\",\n    [\n        (\n            {\"model1\": {\"thresh\": \"foo\"}, \"model2\": {\"thresh\": \"bar\"}},\n            {\n                \"build@model1\": {\"cmd\": \"echo foo\"},\n                \"build@model2\": {\"cmd\": \"echo bar\"},\n            },\n        ),\n        (\n            [{\"thresh\": \"foo\"}, {\"thresh\": \"bar\"}],\n            {\"build@0\": {\"cmd\": \"echo foo\"}, \"build@1\": {\"cmd\": \"echo bar\"}},\n        ),\n    ],\n)\ndef test_foreach_interpolate_with_composite_data(\n    tmp_dir, dvc, foreach_def, foreach_data, result\n):\n    vars_ = [{\"models\": foreach_data}]\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": vars_})\n    data = {\"foreach\": \"${models}\", \"do\": {\"cmd\": f\"echo {foreach_def}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == result\n    assert resolver.context == {\"models\": foreach_data}\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\ndef test_params_file_with_dict_tracked(tmp_dir, dvc):\n    foreach_data = {\"model1\": {\"thresh\": \"foo\"}, \"model2\": {\"thresh\": \"bar\"}}\n    params = {\"models\": foreach_data}\n    (tmp_dir / \"params.yaml\").dump(params)\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    data = {\"foreach\": \"${models}\", \"do\": {\"cmd\": \"echo ${item.thresh}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@model1\": {\"cmd\": \"echo foo\"},\n        \"build@model2\": {\"cmd\": \"echo bar\"},\n    }\n    # check that `foreach` item-key replacement didnot leave any leftovers.\n    assert resolver.context == {\"models\": foreach_data}\n    assert resolver.tracked_vars == {\n        \"build@model1\": {\"params.yaml\": {\"models.model1.thresh\": \"foo\"}},\n        \"build@model2\": {\"params.yaml\": {\"models.model2.thresh\": \"bar\"}},\n    }\n\n\ndef test_params_file_tracked_for_composite_list(tmp_dir, dvc):\n    foreach_data = [{\"thresh\": \"foo\"}, {\"thresh\": \"bar\"}]\n    params = {\"models\": foreach_data}\n    (tmp_dir / \"params.yaml\").dump(params)\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    data = {\"foreach\": \"${models}\", \"do\": {\"cmd\": \"echo ${item.thresh}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@0\": {\"cmd\": \"echo foo\"},\n        \"build@1\": {\"cmd\": \"echo bar\"},\n    }\n    assert resolver.context == {\"models\": foreach_data}\n    assert resolver.tracked_vars == {\n        \"build@0\": {\"params.yaml\": {\"models.0.thresh\": \"foo\"}},\n        \"build@1\": {\"params.yaml\": {\"models.1.thresh\": \"bar\"}},\n    }\n\n\ndef test_foreach_data_from_nested_vars(tmp_dir, dvc):\n    vars_ = {\"models\": {\"lst\": [{\"thresh\": 10}, {\"thresh\": 15}]}}\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [vars_]})\n    data = {\"foreach\": \"${models.lst}\", \"do\": {\"cmd\": \"echo ${item.thresh}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@0\": {\"cmd\": \"echo 10\"},\n        \"build@1\": {\"cmd\": \"echo 15\"},\n    }\n    assert resolver.context == vars_\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\ndef test_foreach_partial_interpolations(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [{\"bar\": \"bar\"}]})\n    foreach_data = {\"model1\": \"foo\", \"model2\": \"${bar}\"}\n    data = {\"foreach\": foreach_data, \"do\": {\"cmd\": \"echo ${item}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@model1\": {\"cmd\": \"echo foo\"},\n        \"build@model2\": {\"cmd\": \"echo bar\"},\n    }\n    assert resolver.context == {\"bar\": \"bar\"}\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\ndef test_mixed_vars_for_foreach_data(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump({\"models\": {\"model1\": \"foo\"}})\n    (tmp_dir / \"test_params.yaml\").dump({\"models\": {\"model2\": \"bar\"}})\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [\"test_params.yaml\"]})\n    data = {\"foreach\": \"${models}\", \"do\": {\"cmd\": \"echo ${item}\"}}\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@model1\": {\"cmd\": \"echo foo\"},\n        \"build@model2\": {\"cmd\": \"echo bar\"},\n    }\n    assert resolver.context == {\"models\": {\"model1\": \"foo\", \"model2\": \"bar\"}}\n    assert resolver.tracked_vars == {\n        \"build@model1\": {\"params.yaml\": {\"models.model1\": \"foo\"}},\n        \"build@model2\": {\"test_params.yaml\": {\"models.model2\": \"bar\"}},\n    }\n\n\ndef test_mixed_vars_for_foreach_data_2(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump(\n        {\"models\": {\"model1\": {\"thresh\": 10}, \"model2\": {\"thresh\": 15}}},\n    )\n    (tmp_dir / \"test_params.yaml\").dump(\n        {\"models\": {\"model1\": {\"epochs\": 5}, \"model2\": {\"epochs\": 10}}},\n    )\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [\"test_params.yaml\"]})\n    data = {\n        \"foreach\": \"${models}\",\n        \"do\": {\"cmd\": \"echo ${item.thresh} ${item.epochs}\"},\n    }\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@model1\": {\"cmd\": \"echo 10 5\"},\n        \"build@model2\": {\"cmd\": \"echo 15 10\"},\n    }\n    assert resolver.context == {\n        \"models\": {\n            \"model1\": {\"thresh\": 10, \"epochs\": 5},\n            \"model2\": {\"thresh\": 15, \"epochs\": 10},\n        }\n    }\n    assert resolver.tracked_vars == {\n        \"build@model1\": {\n            \"params.yaml\": {\"models.model1.thresh\": 10},\n            \"test_params.yaml\": {\"models.model1.epochs\": 5},\n        },\n        \"build@model2\": {\n            \"params.yaml\": {\"models.model2.thresh\": 15},\n            \"test_params.yaml\": {\"models.model2.epochs\": 10},\n        },\n    }\n\n\ndef test_foreach_with_interpolated_wdir(tmp_dir, dvc):\n    resolver = DataResolver(dvc, (tmp_dir / \"data\").fs_path, {})\n    foreach_data = [\"foo\", \"bar\"]\n    data = {\n        \"foreach\": foreach_data,\n        \"do\": {\"wdir\": \"${item}\", \"cmd\": \"echo hello\"},\n    }\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        # note that the resolver generates `wdir` relative to file's wdir\n        # so, this is just `foo`, not `data/foo`.\n        # figuring out `wdir` is the responsibility of the `load_stage`/`Stage`\n        \"build@foo\": {\"wdir\": \"foo\", \"cmd\": \"echo hello\"},\n        \"build@bar\": {\"wdir\": \"bar\", \"cmd\": \"echo hello\"},\n    }\n\n    assert not resolver.context\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\ndef test_foreach_with_local_vars(tmp_dir, dvc):\n    resolver = DataResolver(dvc, (tmp_dir / \"data\").fs_path, {})\n    foreach_data = [\"foo\", \"bar\"]\n    data = {\n        \"foreach\": foreach_data,\n        \"do\": {\n            \"vars\": [{\"foobar\": \"foobar\"}],\n            \"cmd\": \"echo ${item} ${foobar}\",\n        },\n    }\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        # note that the resolver generates `wdir` relative to file's wdir\n        # so, this is just `foo`, not `data/foo`.\n        # figuring out `wdir` is the responsibility of the `load_stage`/`Stage`\n        \"build@foo\": {\"cmd\": \"echo foo foobar\"},\n        \"build@bar\": {\"cmd\": \"echo bar foobar\"},\n    }\n    assert not resolver.context\n    assert not any(item for item in resolver.tracked_vars.values())\n\n\n@pytest.mark.parametrize(\n    \"local_import\",\n    [\n        \"test_params.yaml\",\n        \"test_params.yaml:train\",\n        \"test_params.yaml:train,prepare\",\n    ],\n)\ndef test_foreach_with_imported_vars(tmp_dir, dvc, local_import):\n    (tmp_dir / \"params.yaml\").dump({\"models\": {\"model1\": {\"thresh\": \"foo\"}}})\n    (tmp_dir / \"test_params.yaml\").dump(\n        {\"train\": {\"epochs\": 10}, \"prepare\": {\"nums\": 25}}\n    )\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    foreach_data = [\"foo\", \"bar\"]\n    data = {\n        \"foreach\": foreach_data,\n        \"do\": {\"vars\": [local_import], \"cmd\": \"echo ${item} ${train.epochs}\"},\n    }\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        # note that the resolver generates `wdir` relative to file's wdir\n        # so, this is just `foo`, not `data/foo`.\n        # figuring out `wdir` is the responsibility of the `load_stage`/`Stage`\n        \"build@foo\": {\"cmd\": \"echo foo 10\"},\n        \"build@bar\": {\"cmd\": \"echo bar 10\"},\n    }\n\n    assert resolver.context == {\"models\": {\"model1\": {\"thresh\": \"foo\"}}}\n    assert resolver.tracked_vars == {\n        \"build@foo\": {\"test_params.yaml\": {\"train.epochs\": 10}},\n        \"build@bar\": {\"test_params.yaml\": {\"train.epochs\": 10}},\n    }\n\n\n@pytest.mark.parametrize(\"local_import\", [\"params.yaml\", \"params.yaml:train,prepare\"])\ndef test_foreach_with_interpolated_wdir_and_local_vars(tmp_dir, dvc, local_import):\n    (tmp_dir / \"params.yaml\").dump({\"models\": {\"model1\": {\"thresh\": \"foo\"}}})\n\n    for i in range(5):\n        build_dir = tmp_dir / (\"model-\" + str(i))\n        build_dir.mkdir()\n        (build_dir / \"params.yaml\").dump(\n            {\"train\": {\"epochs\": 1 + i}, \"prepare\": {\"nums\": 10 * i}},\n        )\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    data = {\n        \"foreach\": [0, 1, 2, 3, 4],\n        \"do\": {\n            \"wdir\": \"model-${item}\",\n            \"vars\": [local_import],\n            \"cmd\": \"echo ${item} ${train.epochs} ${prepare.nums}\",\n        },\n    }\n    definition = ForeachDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        # note that the resolver generates `wdir` relative to file's wdir\n        # so, this is just `foo`, not `data/foo`.\n        # figuring out `wdir` is the responsibility of the `load_stage`/`Stage`\n        \"build@0\": {\"wdir\": \"model-0\", \"cmd\": \"echo 0 1 0\"},\n        \"build@1\": {\"wdir\": \"model-1\", \"cmd\": \"echo 1 2 10\"},\n        \"build@2\": {\"wdir\": \"model-2\", \"cmd\": \"echo 2 3 20\"},\n        \"build@3\": {\"wdir\": \"model-3\", \"cmd\": \"echo 3 4 30\"},\n        \"build@4\": {\"wdir\": \"model-4\", \"cmd\": \"echo 4 5 40\"},\n    }\n\n    assert resolver.context == {\"models\": {\"model1\": {\"thresh\": \"foo\"}}}\n    assert resolver.tracked_vars == {\n        \"build@0\": {\n            os.path.join(\"model-0\", \"params.yaml\"): {\n                \"train.epochs\": 1,\n                \"prepare.nums\": 0,\n            }\n        },\n        \"build@1\": {\n            os.path.join(\"model-1\", \"params.yaml\"): {\n                \"train.epochs\": 2,\n                \"prepare.nums\": 10,\n            }\n        },\n        \"build@2\": {\n            os.path.join(\"model-2\", \"params.yaml\"): {\n                \"train.epochs\": 3,\n                \"prepare.nums\": 20,\n            }\n        },\n        \"build@3\": {\n            os.path.join(\"model-3\", \"params.yaml\"): {\n                \"train.epochs\": 4,\n                \"prepare.nums\": 30,\n            }\n        },\n        \"build@4\": {\n            os.path.join(\"model-4\", \"params.yaml\"): {\n                \"train.epochs\": 5,\n                \"prepare.nums\": 40,\n            }\n        },\n    }\n    assert resolver.context.imports == {DEFAULT_PARAMS_FILE: None}\n\n\ndef test_foreach_do_syntax_is_checked_once(tmp_dir, dvc, mocker):\n    do_def = {\"cmd\": \"python script.py --epochs ${item}\"}\n    data = {\"foreach\": [0, 1, 2, 3, 4], \"do\": do_def}\n    definition = ForeachDefinition(\n        DataResolver(dvc, tmp_dir.fs_path, {}), Context(), \"build\", data\n    )\n    mock = mocker.patch(\"dvc.parsing.check_syntax_errors\", return_value=True)\n    definition.resolve_all()\n\n    mock.assert_called_once_with(do_def, \"build\", \"dvc.yaml\")\n\n\ndef test_foreach_data_is_only_resolved_once(tmp_dir, dvc, mocker):\n    context = Context(models=[\"foo\", \"bar\", \"baz\"])\n    data = {\"foreach\": \"${models}\", \"do\": {}}\n    definition = ForeachDefinition(\n        DataResolver(dvc, tmp_dir.fs_path, {}), context, \"build\", data\n    )\n    mock = mocker.spy(definition, \"_resolve_foreach_data\")\n\n    definition.resolve_all()\n\n    mock.assert_called_once_with()\n"
  },
  {
    "path": "tests/func/parsing/test_interpolated_entry.py",
    "content": "import os\nfrom copy import deepcopy\n\nimport pytest\n\nfrom dvc.dependency import _merge_params\nfrom dvc.parsing import DEFAULT_PARAMS_FILE, DataResolver\nfrom dvc.parsing.context import recurse_not_a_node\nfrom dvc.parsing.interpolate import escape_str\n\nfrom . import CONTEXT_DATA, RESOLVED_DVC_YAML_DATA, TEMPLATED_DVC_YAML_DATA, USED_VARS\n\n\ndef assert_stage_equal(d1, d2):\n    \"\"\"Keeps the params section in order, and then checks for equality.\"\"\"\n    for d in [d1, d2]:\n        assert recurse_not_a_node(d)\n        for _, stage_d in d.get(\"stages\", {}).items():\n            params = _merge_params(stage_d.get(\"params\", []))\n            for k in params:\n                params[k] = sorted(params[k])\n            if params:\n                stage_d[\"params\"] = params\n    assert d1 == d2\n\n\ndef test_simple(tmp_dir, dvc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(CONTEXT_DATA)\n    resolver = DataResolver(dvc, tmp_dir.fs_path, deepcopy(TEMPLATED_DVC_YAML_DATA))\n    assert_stage_equal(resolver.resolve(), deepcopy(RESOLVED_DVC_YAML_DATA))\n    assert resolver.tracked_vars == {\n        \"stage1\": {DEFAULT_PARAMS_FILE: USED_VARS[\"stage1\"]},\n        \"stage2\": {DEFAULT_PARAMS_FILE: USED_VARS[\"stage2\"]},\n    }\n\n\ndef test_vars_import(tmp_dir, dvc):\n    \"\"\"\n    Test that different file can be loaded using `vars`\n    instead of default params.yaml.\n    \"\"\"\n    (tmp_dir / \"params2.yaml\").dump(CONTEXT_DATA)\n    d = deepcopy(TEMPLATED_DVC_YAML_DATA)\n    d[\"vars\"] = [\"params2.yaml\"]\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n\n    resolved_data = deepcopy(RESOLVED_DVC_YAML_DATA)\n    assert_stage_equal(resolver.resolve(), resolved_data)\n    assert resolver.tracked_vars == {\n        \"stage1\": {\"params2.yaml\": USED_VARS[\"stage1\"]},\n        \"stage2\": {\"params2.yaml\": USED_VARS[\"stage2\"]},\n    }\n\n\ndef test_vars_and_params_import(tmp_dir, dvc):\n    \"\"\"\n    Test that vars and params are both merged together for interpolation,\n    whilst tracking the \"used\" variables from params.\n    \"\"\"\n    d = {\n        \"vars\": [DEFAULT_PARAMS_FILE, {\"dict\": {\"foo\": \"foobar\"}}],\n        \"stages\": {\"stage1\": {\"cmd\": \"echo ${dict.foo} ${dict.bar}\"}},\n    }\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump({\"dict\": {\"bar\": \"bar\"}})\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert_stage_equal(\n        resolver.resolve(), {\"stages\": {\"stage1\": {\"cmd\": \"echo foobar bar\"}}}\n    )\n    assert resolver.tracked_vars == {\n        \"stage1\": {DEFAULT_PARAMS_FILE: {\"dict.bar\": \"bar\"}}\n    }\n\n\ndef test_stage_with_wdir(tmp_dir, dvc):\n    \"\"\"\n    Test that params file from wdir are also loaded\n    \"\"\"\n    d = {\n        \"stages\": {\n            \"stage1\": {\n                \"cmd\": \"echo ${dict.foo} ${dict.bar}\",\n                \"params\": [\"value1\"],\n                \"wdir\": \"data\",\n                \"vars\": [DEFAULT_PARAMS_FILE],\n            }\n        }\n    }\n\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump({\"dict\": {\"bar\": \"bar\"}})\n    (data_dir / DEFAULT_PARAMS_FILE).dump({\"dict\": {\"foo\": \"foo\"}})\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert_stage_equal(\n        resolver.resolve(),\n        {\n            \"stages\": {\n                \"stage1\": {\n                    \"cmd\": \"echo foo bar\",\n                    \"wdir\": \"data\",\n                    \"params\": [\"value1\"],\n                }\n            }\n        },\n    )\n    assert resolver.tracked_vars == {\n        \"stage1\": {\n            os.path.join(\"data\", DEFAULT_PARAMS_FILE): {\"dict.foo\": \"foo\"},\n            DEFAULT_PARAMS_FILE: {\"dict.bar\": \"bar\"},\n        }\n    }\n\n\ndef test_with_templated_wdir(tmp_dir, dvc):\n    \"\"\"\n    Test that params from the resolved wdir are still loaded\n    and is used in the interpolation.\n    \"\"\"\n    d = {\n        \"stages\": {\n            \"stage1\": {\n                \"cmd\": \"echo ${dict.foo} ${dict.bar}\",\n                \"params\": [\"value1\"],\n                \"wdir\": \"${dict.ws}\",\n                \"vars\": [DEFAULT_PARAMS_FILE],\n            }\n        }\n    }\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump({\"dict\": {\"bar\": \"bar\", \"ws\": \"data\"}})\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n    (data_dir / DEFAULT_PARAMS_FILE).dump({\"dict\": {\"foo\": \"foo\"}})\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert_stage_equal(\n        resolver.resolve(),\n        {\n            \"stages\": {\n                \"stage1\": {\n                    \"cmd\": \"echo foo bar\",\n                    \"wdir\": \"data\",\n                    \"params\": [\"value1\"],\n                }\n            }\n        },\n    )\n    assert resolver.tracked_vars == {\n        \"stage1\": {\n            os.path.join(\"data\", DEFAULT_PARAMS_FILE): {\"dict.foo\": \"foo\"},\n            DEFAULT_PARAMS_FILE: {\"dict.bar\": \"bar\", \"dict.ws\": \"data\"},\n        }\n    }\n    assert resolver.context.imports == {\"params.yaml\": None}\n    assert resolver.context == {\"dict\": {\"bar\": \"bar\", \"ws\": \"data\"}}\n\n\ndef test_resolve_local_tries_to_load_globally_used_files(tmp_dir, dvc):\n    iterable = {\"bar\": \"bar\", \"foo\": \"foo\"}\n    (tmp_dir / \"params.json\").dump(iterable)\n\n    d = {\n        \"vars\": [\"params.json\"],\n        \"stages\": {\n            \"build\": {\n                \"cmd\": \"command --value ${bar}\",\n                \"params\": [{\"params.json\": [\"foo\"]}],\n                \"vars\": [\"params.json\"],\n            }\n        },\n    }\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    assert_stage_equal(\n        resolver.resolve(),\n        {\n            \"stages\": {\n                \"build\": {\n                    \"cmd\": \"command --value bar\",\n                    \"params\": [{\"params.json\": [\"foo\"]}],\n                }\n            }\n        },\n    )\n    assert resolver.tracked_vars == {\"build\": {\"params.json\": {\"bar\": \"bar\"}}}\n\n\ndef test_resolve_local_tries_to_load_globally_used_params_yaml(tmp_dir, dvc):\n    iterable = {\"bar\": \"bar\", \"foo\": \"foo\"}\n    (tmp_dir / \"params.yaml\").dump(iterable)\n\n    d = {\n        \"stages\": {\n            \"build\": {\n                \"cmd\": \"command --value ${bar}\",\n                \"params\": [{\"params.yaml\": [\"foo\"]}],\n                \"vars\": [\"params.yaml\"],\n            }\n        }\n    }\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    assert_stage_equal(\n        resolver.resolve(),\n        {\n            \"stages\": {\n                \"build\": {\n                    \"cmd\": \"command --value bar\",\n                    \"params\": [{\"params.yaml\": [\"foo\"]}],\n                }\n            }\n        },\n    )\n    assert resolver.tracked_vars == {\"build\": {\"params.yaml\": {\"bar\": \"bar\"}}}\n\n\ndef test_vars_relpath_overwrite(tmp_dir, dvc):\n    iterable = {\"bar\": \"bar\", \"foo\": \"foo\"}\n    (tmp_dir / \"params.yaml\").dump(iterable)\n    d = {\n        \"vars\": [\"params.yaml\"],\n        \"stages\": {\n            \"build\": {\n                \"wdir\": \"data\",\n                \"cmd\": \"echo ${bar}\",\n                \"vars\": [\"../params.yaml\"],\n            }\n        },\n    }\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    resolver.resolve()\n    assert resolver.context.imports == {\"params.yaml\": None}\n\n\n@pytest.mark.parametrize(\"local\", [True, False])\n@pytest.mark.parametrize(\n    \"vars_\",\n    [\n        [\"test_params.yaml:bar\", \"test_params.yaml:foo\"],\n        [\"test_params.yaml:foo,bar\"],\n        [\"test_params.yaml\"],\n        [\"test_params.yaml\", \"test_params.yaml\"],\n    ],\n)\ndef test_vars_load_partial(tmp_dir, dvc, local, vars_):\n    iterable = {\"bar\": \"bar\", \"foo\": \"foo\"}\n    (tmp_dir / \"test_params.yaml\").dump(iterable)\n    d = {\"stages\": {\"build\": {\"cmd\": \"echo ${bar}\"}}}\n    if local:\n        d[\"stages\"][\"build\"][\"vars\"] = vars_\n    else:\n        d[\"vars\"] = vars_\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    resolver.resolve()\n\n\n@pytest.mark.parametrize(\n    \"bool_config, list_config\",\n    [(None, None), (\"store_true\", \"nargs\"), (\"boolean_optional\", \"append\")],\n)\ndef test_cmd_dict(tmp_dir, dvc, bool_config, list_config):\n    with dvc.config.edit() as conf:\n        if bool_config:\n            conf[\"parsing\"][\"bool\"] = bool_config\n        if list_config:\n            conf[\"parsing\"][\"list\"] = list_config\n\n    string = \"spaced string\"\n    mixed_quote_string = \"quote\\\"'d\"\n    data = {\n        \"dict\": {\n            \"foo\": \"foo\",\n            \"bar\": 2,\n            \"string\": string,\n            \"mixed_quote_string\": mixed_quote_string,\n            \"bool\": True,\n            \"bool-false\": False,\n            \"list\": [1, 2, \"foo\", mixed_quote_string],\n            \"nested\": {\"foo\": \"foo\"},\n        }\n    }\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(data)\n    resolver = DataResolver(\n        dvc,\n        tmp_dir.fs_path,\n        {\"stages\": {\"stage1\": {\"cmd\": \"python script.py ${dict}\"}}},\n    )\n\n    if bool_config is None or bool_config == \"store_true\":\n        bool_resolved = \" --bool\"\n    else:\n        bool_resolved = \" --bool --no-bool-false\"\n\n    if list_config is None or list_config == \"nargs\":\n        list_resolved = f\" --list 1 2 foo {escape_str(mixed_quote_string)}\"\n    else:\n        list_resolved = \" --list 1 --list 2 --list foo\"\n        list_resolved += f\" --list {escape_str(mixed_quote_string)}\"\n\n    assert_stage_equal(\n        resolver.resolve(),\n        {\n            \"stages\": {\n                \"stage1\": {\n                    \"cmd\": (\n                        \"python script.py\"\n                        \" --foo foo --bar 2\"\n                        f\" --string {escape_str(string)}\"\n                        \" --mixed_quote_string\"\n                        f\" {escape_str(mixed_quote_string)}\"\n                        f\"{bool_resolved}\"\n                        f\"{list_resolved}\"\n                        \" --nested.foo foo\"\n                    )\n                }\n            }\n        },\n    )\n"
  },
  {
    "path": "tests/func/parsing/test_matrix.py",
    "content": "import pytest\n\nfrom dvc.parsing import DataResolver, MatrixDefinition\n\nMATRIX_DATA = {\n    \"os\": [\"win\", \"linux\"],\n    \"pyv\": [3.7, 3.8],\n    \"dict\": [{\"arg1\": 1}, {\"arg2\": 2}],\n    \"list\": [[\"out1\", \"out11\"], [\"out2\", \"out22\"]],\n}\n\n\n@pytest.mark.parametrize(\n    \"matrix\",\n    [\n        MATRIX_DATA,\n        {\n            \"os\": \"${os}\",\n            \"pyv\": \"${pyv}\",\n            \"dict\": \"${dict}\",\n            \"list\": \"${list}\",\n        },\n    ],\n)\ndef test_matrix_interpolated(tmp_dir, dvc, matrix):\n    (tmp_dir / \"params.yaml\").dump(MATRIX_DATA)\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    data = {\n        \"matrix\": matrix,\n        \"cmd\": \"echo ${item.os} ${item.pyv} ${item.dict}\"\n        \" -- ${item.list.0} ${item.list.1}\",\n    }\n    definition = MatrixDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@win-3.7-dict0-list0\": {\"cmd\": \"echo win 3.7 --arg1 1 -- out1 out11\"},\n        \"build@win-3.7-dict0-list1\": {\"cmd\": \"echo win 3.7 --arg1 1 -- out2 out22\"},\n        \"build@win-3.7-dict1-list0\": {\"cmd\": \"echo win 3.7 --arg2 2 -- out1 out11\"},\n        \"build@win-3.7-dict1-list1\": {\"cmd\": \"echo win 3.7 --arg2 2 -- out2 out22\"},\n        \"build@win-3.8-dict0-list0\": {\"cmd\": \"echo win 3.8 --arg1 1 -- out1 out11\"},\n        \"build@win-3.8-dict0-list1\": {\"cmd\": \"echo win 3.8 --arg1 1 -- out2 out22\"},\n        \"build@win-3.8-dict1-list0\": {\"cmd\": \"echo win 3.8 --arg2 2 -- out1 out11\"},\n        \"build@win-3.8-dict1-list1\": {\"cmd\": \"echo win 3.8 --arg2 2 -- out2 out22\"},\n        \"build@linux-3.7-dict0-list0\": {\"cmd\": \"echo linux 3.7 --arg1 1 -- out1 out11\"},\n        \"build@linux-3.7-dict0-list1\": {\"cmd\": \"echo linux 3.7 --arg1 1 -- out2 out22\"},\n        \"build@linux-3.7-dict1-list0\": {\"cmd\": \"echo linux 3.7 --arg2 2 -- out1 out11\"},\n        \"build@linux-3.7-dict1-list1\": {\"cmd\": \"echo linux 3.7 --arg2 2 -- out2 out22\"},\n        \"build@linux-3.8-dict0-list0\": {\"cmd\": \"echo linux 3.8 --arg1 1 -- out1 out11\"},\n        \"build@linux-3.8-dict0-list1\": {\"cmd\": \"echo linux 3.8 --arg1 1 -- out2 out22\"},\n        \"build@linux-3.8-dict1-list0\": {\"cmd\": \"echo linux 3.8 --arg2 2 -- out1 out11\"},\n        \"build@linux-3.8-dict1-list1\": {\"cmd\": \"echo linux 3.8 --arg2 2 -- out2 out22\"},\n    }\n\n\n@pytest.mark.parametrize(\n    \"matrix\",\n    [\n        MATRIX_DATA,\n        {\n            \"os\": \"${os}\",\n            \"pyv\": \"${pyv}\",\n            \"dict\": \"${dict}\",\n            \"list\": \"${list}\",\n        },\n    ],\n)\ndef test_matrix_key_present(tmp_dir, dvc, matrix):\n    (tmp_dir / \"params.yaml\").dump(MATRIX_DATA)\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    data = {\n        \"matrix\": matrix,\n        \"cmd\": \"echo ${key}\",\n    }\n    definition = MatrixDefinition(resolver, resolver.context, \"build\", data)\n\n    assert definition.resolve_all() == {\n        \"build@win-3.7-dict0-list0\": {\"cmd\": \"echo win-3.7-dict0-list0\"},\n        \"build@win-3.7-dict0-list1\": {\"cmd\": \"echo win-3.7-dict0-list1\"},\n        \"build@win-3.7-dict1-list0\": {\"cmd\": \"echo win-3.7-dict1-list0\"},\n        \"build@win-3.7-dict1-list1\": {\"cmd\": \"echo win-3.7-dict1-list1\"},\n        \"build@win-3.8-dict0-list0\": {\"cmd\": \"echo win-3.8-dict0-list0\"},\n        \"build@win-3.8-dict0-list1\": {\"cmd\": \"echo win-3.8-dict0-list1\"},\n        \"build@win-3.8-dict1-list0\": {\"cmd\": \"echo win-3.8-dict1-list0\"},\n        \"build@win-3.8-dict1-list1\": {\"cmd\": \"echo win-3.8-dict1-list1\"},\n        \"build@linux-3.7-dict0-list0\": {\"cmd\": \"echo linux-3.7-dict0-list0\"},\n        \"build@linux-3.7-dict0-list1\": {\"cmd\": \"echo linux-3.7-dict0-list1\"},\n        \"build@linux-3.7-dict1-list0\": {\"cmd\": \"echo linux-3.7-dict1-list0\"},\n        \"build@linux-3.7-dict1-list1\": {\"cmd\": \"echo linux-3.7-dict1-list1\"},\n        \"build@linux-3.8-dict0-list0\": {\"cmd\": \"echo linux-3.8-dict0-list0\"},\n        \"build@linux-3.8-dict0-list1\": {\"cmd\": \"echo linux-3.8-dict0-list1\"},\n        \"build@linux-3.8-dict1-list0\": {\"cmd\": \"echo linux-3.8-dict1-list0\"},\n        \"build@linux-3.8-dict1-list1\": {\"cmd\": \"echo linux-3.8-dict1-list1\"},\n    }\n"
  },
  {
    "path": "tests/func/parsing/test_resolver.py",
    "content": "from copy import deepcopy\n\nimport pytest\n\nfrom dvc.parsing import DEFAULT_PARAMS_FILE, DataResolver, ResolveError\nfrom dvc.parsing.context import Context\nfrom dvc.utils.serialize import dumps_yaml\n\nfrom . import CONTEXT_DATA, RESOLVED_DVC_YAML_DATA, TEMPLATED_DVC_YAML_DATA\n\nDATA = {\"models\": {\"bar\": \"bar\", \"foo\": \"foo\"}}\n\n\ndef test_resolver(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, TEMPLATED_DVC_YAML_DATA)\n    resolver.context.merge_update(Context(CONTEXT_DATA))\n    assert resolver.resolve() == RESOLVED_DVC_YAML_DATA\n\n\ndef test_default_params_file_not_exist(tmp_dir, dvc):\n    d = {\"vars\": [DATA[\"models\"]]}\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    assert resolver.context == d[\"vars\"][0]\n\n\ndef test_no_params_yaml_and_vars(tmp_dir, dvc):\n    resolver = DataResolver(dvc, tmp_dir.fs_path, {})\n    assert not resolver.context\n\n\ndef test_local_vars(tmp_dir, dvc):\n    resolver = DataResolver(\n        dvc, tmp_dir.fs_path, {\"vars\": [{\"foo\": \"bar\", \"bar\": \"foo\"}]}\n    )\n    assert resolver.context == {\"foo\": \"bar\", \"bar\": \"foo\"}\n\n\n@pytest.mark.parametrize(\"vars_\", [\"${file}_params.yaml\", {\"foo\": \"${foo}\"}])\ndef test_vars_interpolation_errors(tmp_dir, dvc, vars_):\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [vars_, {\"bar\": \"foo\"}]})\n    assert (\n        str(exc_info.value)\n        == \"failed to parse 'vars' in 'dvc.yaml': interpolating is not allowed\"\n    )\n\n\n@pytest.mark.parametrize(\"vars_\", [{}, {\"vars\": []}, {\"vars\": [DEFAULT_PARAMS_FILE]}])\ndef test_default_params_file(tmp_dir, dvc, vars_):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n    resolver = DataResolver(dvc, tmp_dir.fs_path, vars_)\n    assert resolver.context == DATA\n\n\ndef test_load_vars_from_file(tmp_dir, dvc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n\n    datasets = {\"datasets\": [\"foo\", \"bar\"]}\n    (tmp_dir / \"params.json\").dump(datasets)\n    d = {\"vars\": [DEFAULT_PARAMS_FILE, \"params.json\"]}\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n\n    expected = deepcopy(DATA)\n    expected.update(datasets)\n    assert resolver.context == expected\n\n\ndef test_load_vars_with_relpath(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(DEFAULT_PARAMS_FILE, dumps_yaml(DATA), commit=\"add params\")\n\n    revisions = [\"HEAD\", \"workspace\"]\n    for rev in dvc.brancher(revs=[\"HEAD\"]):\n        assert rev == revisions.pop()\n        d = {\"vars\": [f\"../{DEFAULT_PARAMS_FILE}\"]}\n        resolver = DataResolver(dvc, \"subdir\", d)\n        assert resolver.context == deepcopy(DATA)\n\n\ndef test_partial_vars_doesnot_exist(tmp_dir, dvc):\n    (tmp_dir / \"test_params.yaml\").dump({\"sub1\": \"sub1\"})\n\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, {\"vars\": [\"test_params.yaml:sub2\"]})\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'vars' in 'dvc.yaml': \"\n        \"could not find 'sub2' in 'test_params.yaml'\"\n    )\n\n\ndef test_global_overwrite_error_on_imports(tmp_dir, dvc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n    (tmp_dir / \"params.json\").dump(DATA)\n\n    d = {\"vars\": [DEFAULT_PARAMS_FILE, \"params.json\"]}\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'vars' in 'dvc.yaml':\\n\"\n        \"cannot redefine 'models.bar' from 'params.json' \"\n        \"as it already exists in 'params.yaml'\"\n    )\n\n\ndef test_global_overwrite_vars(tmp_dir, dvc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n    d = {\"vars\": [DATA]}\n\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'vars' in 'dvc.yaml':\\n\"\n        \"cannot redefine 'models.bar' from 'vars[0]' \"\n        \"as it already exists in 'params.yaml'\"\n    )\n\n\ndef test_local_declared_vars_overwrite(tmp_dir, dvc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n\n    d = {\"vars\": [DATA[\"models\"], DATA[\"models\"]]}\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'vars' in 'dvc.yaml':\\n\"\n        \"cannot redefine 'bar' from 'vars[1]' \"\n        \"as it already exists in 'vars[0]'\"\n    )\n\n\ndef test_specified_params_file_not_exist(tmp_dir, dvc):\n    d = {\"vars\": [\"not_existing_params.yaml\"]}\n    with pytest.raises(ResolveError) as exc_info:\n        DataResolver(dvc, tmp_dir.fs_path, d)\n\n    assert (\n        str(exc_info.value) == \"failed to parse 'vars' in 'dvc.yaml': \"\n        \"'not_existing_params.yaml' does not exist\"\n    )\n\n\n@pytest.mark.parametrize(\"local\", [True, False])\n@pytest.mark.parametrize(\n    \"vars_\",\n    [\n        [\"test_params.yaml\", \"test_params.yaml:sub1\"],\n        [\"test_params.yaml:sub1\", \"test_params.yaml\"],\n        [\"test_params.yaml:sub1\", \"test_params.yaml:sub1,sub2\"],\n    ],\n)\ndef test_vars_already_loaded_message(tmp_dir, dvc, local, vars_):\n    d = {\"stages\": {\"build\": {\"cmd\": \"echo ${sub1} ${sub2}\"}}}\n    (tmp_dir / \"test_params.yaml\").dump({\"sub1\": \"sub1\", \"sub2\": \"sub2\"})\n    if not local:\n        d[\"vars\"] = vars_\n    else:\n        d[\"stages\"][\"build\"][\"vars\"] = vars_\n\n    with pytest.raises(ResolveError) as exc_info:  # noqa: PT012\n        resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n        resolver.resolve()\n    assert \"partially\" in str(exc_info.value)\n\n\n@pytest.mark.parametrize(\n    \"vars_, loc\", [(DATA, \"build.vars[0]\"), (\"params.json\", \"params.json\")]\n)\ndef test_local_overwrite_error(tmp_dir, dvc, vars_, loc):\n    (tmp_dir / DEFAULT_PARAMS_FILE).dump(DATA)\n    (tmp_dir / \"params.json\").dump(DATA)\n\n    d = {\"stages\": {\"build\": {\"cmd\": \"echo ${models.foo}\", \"vars\": [vars_]}}}\n\n    resolver = DataResolver(dvc, tmp_dir.fs_path, d)\n    with pytest.raises(ResolveError) as exc_info:\n        resolver.resolve()\n\n    assert (\n        str(exc_info.value) == \"failed to parse stage 'build' in 'dvc.yaml':\\n\"\n        f\"cannot redefine 'models.bar' from '{loc}' \"\n        \"as it already exists in 'params.yaml'\"\n    )\n"
  },
  {
    "path": "tests/func/parsing/test_top_level.py",
    "content": "from dvc.parsing import DataResolver\n\n\ndef test_params(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump(\n        {\"params\": {\"param1\": \"params.json\", \"param2\": \"params.toml\"}}\n    )\n\n    template = {\"params\": [\"${params.param1}\", \"param11\", \"${params.param2}\"]}\n    resolver = DataResolver(dvc, tmp_dir, template)\n    assert resolver.resolve_params() == [\"params.json\", \"param11\", \"params.toml\"]\n\n\ndef test_metrics(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump(\n        {\"metrics\": {\"metric1\": \"metrics.json\", \"metric2\": \"metrics.toml\"}}\n    )\n\n    template = {\"metrics\": [\"${metrics.metric1}\", \"metric11\", \"${metrics.metric2}\"]}\n    resolver = DataResolver(dvc, tmp_dir, template)\n    assert resolver.resolve_metrics() == [\"metrics.json\", \"metric11\", \"metrics.toml\"]\n\n\ndef test_plots(tmp_dir, dvc):\n    template = {\n        \"plots\": [\n            {\n                \"${plots.plot1_name}\": {\n                    \"x\": \"${plots.x_cls}\",\n                    \"y\": {\n                        \"train_classes.csv\": \"${plots.y_train_cls}\",\n                        \"test_classes.csv\": [\n                            \"${plots.y_train_cls}\",\n                            \"${plots.y_test_cls}\",\n                        ],\n                    },\n                    \"title\": \"Compare test vs train confusion matrix\",\n                    \"template\": \"confusion\",\n                    \"x_label\": \"Actual class\",\n                    \"y_label\": \"Predicted class\",\n                }\n            },\n            {\"eval/importance2.png\": None},\n            {\"${plots.plot3_name}\": None},\n            \"eval/importance4.png\",\n            \"${plots.plot5_name}\",\n        ],\n    }\n\n    (tmp_dir / \"params.yaml\").dump(\n        {\n            \"plots\": {\n                \"x_cls\": \"actual_class\",\n                \"y_train_cls\": \"predicted_class\",\n                \"y_test_cls\": \"predicted_class2\",\n                \"plot1_name\": \"eval/importance1.png\",\n                \"plot3_name\": \"eval/importance3.png\",\n                \"plot5_name\": \"eval/importance5.png\",\n            }\n        }\n    )\n    resolver = DataResolver(dvc, tmp_dir, template)\n    assert resolver.resolve_plots() == [\n        {\n            \"eval/importance1.png\": {\n                \"x\": \"actual_class\",\n                \"y\": {\n                    \"train_classes.csv\": \"predicted_class\",\n                    \"test_classes.csv\": [\"predicted_class\", \"predicted_class2\"],\n                },\n                \"title\": \"Compare test vs train confusion matrix\",\n                \"template\": \"confusion\",\n                \"x_label\": \"Actual class\",\n                \"y_label\": \"Predicted class\",\n            }\n        },\n        {\"eval/importance2.png\": None},\n        {\"eval/importance3.png\": None},\n        \"eval/importance4.png\",\n        \"eval/importance5.png\",\n    ]\n\n\ndef test_artifacts(tmp_dir, dvc):\n    template = {\n        \"artifacts\": {\n            \"${artifacts.name}\": {\n                \"path\": \"${artifacts.path}\",\n                \"type\": \"model\",\n                \"desc\": \"CV classification model, ResNet50\",\n                \"labels\": [\"${artifacts.label1}\", \"${artifacts.label2}\"],\n                \"meta\": {\"framework\": \"${artifacts.framework}\"},\n            }\n        }\n    }\n\n    (tmp_dir / \"params.yaml\").dump(\n        {\n            \"artifacts\": {\n                \"name\": \"cv-classification\",\n                \"path\": \"models/resnet.pt\",\n                \"label1\": \"resnet50\",\n                \"label2\": \"classification\",\n                \"framework\": \"pytorch\",\n            }\n        }\n    )\n\n    resolver = DataResolver(dvc, tmp_dir, template)\n    assert resolver.resolve_artifacts() == {\n        \"cv-classification\": {\n            \"path\": \"models/resnet.pt\",\n            \"type\": \"model\",\n            \"desc\": \"CV classification model, ResNet50\",\n            \"labels\": [\"resnet50\", \"classification\"],\n            \"meta\": {\"framework\": \"pytorch\"},\n        }\n    }\n\n\ndef test_datasets(tmp_dir, dvc):\n    template = {\n        \"datasets\": [\n            {\"name\": \"${ds1.name}\", \"url\": \"${ds1.url}\", \"type\": \"dc\"},\n            {\n                \"name\": \"${ds2.name}\",\n                \"url\": \"${ds2.url}\",\n                \"type\": \"dvc\",\n                \"path\": \"${ds2.path}\",\n            },\n            {\n                \"name\": \"${ds3.name}\",\n                \"url\": \"${ds3.url}\",\n                \"type\": \"url\",\n            },\n        ]\n    }\n\n    (tmp_dir / \"params.yaml\").dump(\n        {\n            \"ds1\": {\"name\": \"dogs\", \"url\": \"dc://dogs\"},\n            \"ds2\": {\n                \"name\": \"example-get-started\",\n                \"url\": \"git@github.com:iterative/example-get-started.git\",\n                \"path\": \"path\",\n            },\n            \"ds3\": {\n                \"name\": \"cloud-versioning-demo\",\n                \"url\": \"s3://cloud-versioning-demo\",\n            },\n        }\n    )\n\n    resolver = DataResolver(dvc, tmp_dir, template)\n    assert resolver.resolve_datasets() == [\n        {\"name\": \"dogs\", \"url\": \"dc://dogs\", \"type\": \"dc\"},\n        {\n            \"name\": \"example-get-started\",\n            \"url\": \"git@github.com:iterative/example-get-started.git\",\n            \"type\": \"dvc\",\n            \"path\": \"path\",\n        },\n        {\n            \"name\": \"cloud-versioning-demo\",\n            \"url\": \"s3://cloud-versioning-demo\",\n            \"type\": \"url\",\n        },\n    ]\n"
  },
  {
    "path": "tests/func/plots/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/plots/test_collect.py",
    "content": "import dpath\n\nfrom dvc.repo.plots import Plots\n\n\ndef test_subdir_config_not_overwritten_by_parents(tmp_dir, scm, dvc):\n    plot_data = [\n        {\"x\": 1, \"y\": 0.1},\n        {\"x\": 2, \"y\": 0.2},\n        {\"x\": 3, \"y\": 0.3},\n    ]\n    subdir_plot_data = [\n        {\"x\": 1, \"y\": 0.2, \"z\": 0.1},\n        {\"x\": 2, \"y\": 0.3, \"z\": 0.2},\n        {\"x\": 3, \"y\": 0.4, \"z\": 0.3},\n    ]\n\n    (tmp_dir / \"plots\").mkdir()\n    (tmp_dir / \"plots\" / \"subdir\").mkdir()\n\n    (tmp_dir / \"plots\" / \"plot.json\").dump_json(plot_data)\n    (tmp_dir / \"plots\" / \"subdir\" / \"plot.json\").dump_json(subdir_plot_data)\n\n    plots_config = [\n        {\n            \"plots/subdir/\": {\n                \"x\": \"z\",\n                \"y\": \"x\",\n            }\n        },\n        {\"plots\": {\"x\": \"x\", \"y\": \"y\"}},\n        {\n            \"subdir axis defined by filename\": {\n                \"x\": {\"plots/subdir/plot.json\": \"x\"},\n                \"y\": {\"plots/subdir/plot.json\": \"y\"},\n            }\n        },\n    ]\n\n    from dvc.utils.serialize import modify_yaml\n\n    with modify_yaml(\"dvc.yaml\") as dvcfile_content:\n        dvcfile_content[\"plots\"] = plots_config\n\n    scm.add(\n        [\n            \"plots/plot.json\",\n            \"plots/subdir/plot.json\",\n            \"dvc.yaml\",\n        ]\n    )\n    scm.commit(\"add data sources\")\n\n    plots = next(Plots(dvc).collect())\n\n    assert dpath.get(plots, \"workspace/definitions/data/dvc.yaml/data\") == {\n        \"plots/plot.json\": {\"x\": \"x\", \"y\": \"y\"},\n        \"plots/subdir/plot.json\": {\"x\": \"z\", \"y\": \"x\"},\n        \"subdir axis defined by filename\": {\n            \"x\": {\"plots/subdir/plot.json\": \"x\"},\n            \"y\": {\"plots/subdir/plot.json\": \"y\"},\n        },\n    }\n"
  },
  {
    "path": "tests/func/plots/test_diff.py",
    "content": "import pytest\n\nfrom tests.utils.plots import get_plot\n\n\ndef test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics):\n    (tmp_dir / \"metric_t.json\").dump([{\"y\": 2}, {\"y\": 3}], sort_keys=True)\n    run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots=[\"metric.json\"],\n        name=\"train\",\n        commit=\"init\",\n    )\n\n    metric_head = [{\"y\": 3}, {\"y\": 5}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric_head, sort_keys=True)\n    dvc.reproduce()\n    scm.add([\"dvc.lock\"])\n    scm.commit(\"second\")\n\n    metric_1 = [{\"y\": 5}, {\"y\": 6}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric_1, sort_keys=True)\n    dvc.reproduce()\n\n    props = {\"fields\": [\"y\"]}\n    diff_result = dvc.plots.diff(props=props)\n\n    assert get_plot(diff_result, \"workspace\", file=\"metric.json\") == metric_1\n    assert get_plot(\n        diff_result, \"workspace\", \"definitions\", file=\"\", endkey=\"data\"\n    ) == {\"metric.json\": props}\n    assert get_plot(diff_result, \"HEAD\", file=\"metric.json\") == metric_head\n    assert get_plot(diff_result, \"HEAD\", \"definitions\", file=\"\", endkey=\"data\") == {\n        \"metric.json\": props\n    }\n\n    metric_2 = [{\"y\": 7}, {\"y\": 8}]\n    (tmp_dir / \"metric.json\").dump_json(metric_2, sort_keys=True)\n\n    diff_result = dvc.plots.diff(props=props)\n    assert get_plot(diff_result, \"workspace\", file=\"metric.json\") == metric_2\n    assert get_plot(\n        diff_result, \"workspace\", \"definitions\", file=\"\", endkey=\"data\"\n    ) == {\"metric.json\": props}\n\n    assert get_plot(diff_result, \"HEAD\", file=\"metric.json\") == metric_head\n    assert get_plot(\n        diff_result, \"workspace\", \"definitions\", file=\"\", endkey=\"data\"\n    ) == {\"metric.json\": props}\n\n\n@pytest.mark.vscode\ndef test_no_commits(tmp_dir):\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\n    git = Git.init(tmp_dir.fs_path)\n    assert git.no_commits\n\n    assert Repo.init().plots.diff() == {}\n"
  },
  {
    "path": "tests/func/plots/test_modify.py",
    "content": "import pytest\n\nfrom dvc.dvcfile import LOCK_FILE\nfrom dvc.repo.plots import PropsNotFoundError\nfrom dvc.utils import relpath\nfrom tests.utils.plots import get_plot\n\n\ndef test_plots_modify_existing_template(\n    tmp_dir, dvc, run_copy_metrics, custom_template\n):\n    metric = [{\"a\": 1, \"b\": 2}, {\"a\": 2, \"b\": 3}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric, sort_keys=True)\n    stage = run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots_no_cache=[\"metric.json\"],\n        name=\"copy-metrics\",\n        single_stage=False,\n    )\n    dvc.plots.modify(\"metric.json\", props={\"template\": relpath(custom_template)})\n    stage = stage.reload()\n    assert stage.outs[0].plot == {\"template\": relpath(custom_template)}\n\n\ndef test_plots_modify_should_not_change_lockfile(\n    tmp_dir, dvc, run_copy_metrics, custom_template\n):\n    (tmp_dir / \"metric_t.json\").dump_json([{\"a\": 1, \"b\": 2}], sort_keys=True)\n    run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots_no_cache=[\"metric.json\"],\n        name=\"copy-metrics\",\n        single_stage=False,\n    )\n\n    (tmp_dir / LOCK_FILE).unlink()\n    dvc.plots.modify(\"metric.json\", props={\"template\": relpath(custom_template)})\n    assert not (tmp_dir / LOCK_FILE).exists()\n\n\ndef test_plots_modify_not_existing_template(dvc):\n    from dvc_render.vega_templates import TemplateNotFoundError\n\n    with pytest.raises(TemplateNotFoundError):\n        dvc.plots.modify(\n            \"metric.json\", props={\"template\": \"not-existing-template.json\"}\n        )\n\n\ndef test_unset_nonexistent(tmp_dir, dvc, run_copy_metrics, custom_template):\n    metric = [{\"a\": 1, \"b\": 2}, {\"a\": 2, \"b\": 3}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric, sort_keys=True)\n    run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots_no_cache=[\"metric.json\"],\n        name=\"copy-metrics\",\n        single_stage=False,\n    )\n\n    with pytest.raises(PropsNotFoundError):\n        dvc.plots.modify(\"metric.json\", unset=[\"nonexistent\"])\n\n\ndef test_dir_plots(tmp_dir, dvc, run_copy_metrics):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n\n    fname = \"file.json\"\n    (tmp_dir / fname).dump_json(metric, sort_keys=True)\n\n    p1 = \"subdir/p1.json\"\n    p2 = \"subdir/p2.json\"\n    tmp_dir.dvc.run(\n        cmd=(\n            f\"mkdir subdir && python copy.py {fname} {p1} && \"\n            f\"python copy.py {fname} {p2}\"\n        ),\n        deps=[fname],\n        single_stage=False,\n        plots=[\"subdir\"],\n        name=\"copy_double\",\n    )\n    dvc.plots.modify(\"subdir\", {\"title\": \"TITLE\"})\n\n    result = dvc.plots.show()\n    assert get_plot(result, \"workspace\", typ=\"definitions\", file=\"\") == {\n        p1: {\"title\": \"TITLE\"},\n        p2: {\"title\": \"TITLE\"},\n    }\n"
  },
  {
    "path": "tests/func/plots/test_show.py",
    "content": "import json\nimport os\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.exceptions import OverlappingOutputPathsError\nfrom dvc.repo import Repo\nfrom dvc.repo.plots import PlotMetricTypeError, onerror_collect\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import EncodingError, YAMLFileCorruptedError, modify_yaml\nfrom tests.utils.plots import get_plot\n\n\ndef test_show_targets(tmp_dir, dvc):\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    (tmp_dir / \"metric.json\").dump_json(metric, sort_keys=True)\n\n    plots = dvc.plots.show(targets=[\"metric.json\"])\n    assert get_plot(plots, \"workspace\", file=\"metric.json\") == metric\n\n    plots = dvc.plots.show(targets=(tmp_dir / \"metric.json\").fs_path)\n    assert get_plot(plots, \"workspace\", file=\"metric.json\") == metric\n\n\ndef test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics):\n    metric1 = [{\"y\": 2}, {\"y\": 3}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric1, sort_keys=True)\n    run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots=[\"metric.json\"],\n        name=\"copy-metric\",\n        commit=\"there is metric\",\n    )\n    scm.tag(\"v1\")\n\n    # Make a different plot and then remove its datafile\n    metric2 = [{\"y\": 3}, {\"y\": 4}]\n    (tmp_dir / \"metric_t.json\").dump_json(metric2, sort_keys=True)\n    stage = run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots=[\"metric.json\"],\n        name=\"copy-metric\",\n        commit=\"there is an another metric\",\n    )\n    scm.tag(\"v2\")\n    remove(stage.outs[0].fspath)\n    remove(stage.outs[0].cache_path)\n\n    plots_data = dvc.plots.show(revs=[\"v1\", \"v2\"], targets=[\"metric.json\"])\n\n    assert get_plot(plots_data, \"v1\", file=\"metric.json\") == metric1\n    assert isinstance(\n        get_plot(plots_data, \"v2\", file=\"metric.json\", endkey=\"error\"),\n        FileNotFoundError,\n    )\n\n\ndef test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics):\n    tmp_dir.gen(\"metric_t.txt\", \"some text\")\n    run_copy_metrics(\n        \"metric_t.txt\",\n        \"metric.txt\",\n        plots_no_cache=[\"metric.txt\"],\n        name=\"copy-metric\",\n        commit=\"add text metric\",\n    )\n\n    result = dvc.plots.show(targets=[\"metric.txt\"], onerror=onerror_collect)\n    assert isinstance(\n        get_plot(result, \"workspace\", file=\"metric.txt\", endkey=\"error\"),\n        PlotMetricTypeError,\n    )\n\n\n@pytest.mark.parametrize(\"use_dvc\", [True, False])\ndef test_show_non_plot(tmp_dir, scm, use_dvc):\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    (tmp_dir / \"metric.json\").dump_json(metric, sort_keys=True)\n\n    if use_dvc:\n        dvc = Repo.init()\n    else:\n        dvc = Repo(uninitialized=True)\n\n    plots = dvc.plots.show(targets=[\"metric.json\"])\n\n    assert get_plot(plots, \"workspace\", file=\"metric.json\") == metric\n\n\ndef test_show_non_plot_and_plot_with_params(tmp_dir, scm, dvc, run_copy_metrics):\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    (tmp_dir / \"metric.json\").dump_json(metric, sort_keys=True)\n    run_copy_metrics(\n        \"metric.json\",\n        \"metric2.json\",\n        plots_no_cache=[\"metric2.json\"],\n        name=\"train\",\n    )\n    props = {\"title\": \"TITLE\"}\n    dvc.plots.modify(\"metric2.json\", props=props)\n\n    result = dvc.plots.show(targets=[\"metric.json\", \"metric2.json\"])\n\n    assert get_plot(result, \"workspace\", file=\"metric.json\") == metric\n    assert get_plot(result, \"workspace\", file=\"metric2.json\") == metric\n    assert get_plot(result, \"workspace\", file=\"metric2.json\", endkey=\"props\") == props\n\n\ndef test_show_from_subdir(tmp_dir, dvc, capsys):\n    subdir = tmp_dir / \"subdir\"\n\n    subdir.mkdir()\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    (subdir / \"metric.json\").dump_json(metric, sort_keys=True)\n\n    with subdir.chdir():\n        assert main([\"plots\", \"show\", \"metric.json\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert subdir.as_uri() in out\n    assert (subdir / \"dvc_plots\").is_dir()\n    assert (subdir / \"dvc_plots\" / \"index.html\").is_file()\n\n\ndef test_plots_show_non_existing(tmp_dir, dvc, capsys):\n    result = dvc.plots.show(targets=[\"plot.json\"])\n    assert isinstance(\n        get_plot(result, \"workspace\", file=\"plot.json\", endkey=\"error\"),\n        FileNotFoundError,\n    )\n\n    cap = capsys.readouterr()\n    assert (\n        \"DVC failed to load some plots for following revisions: 'workspace'\" in cap.err\n    )\n\n\n@pytest.mark.parametrize(\"clear_before_run\", [True, False])\ndef test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run):\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n\n    (data_dir / \"m1_temp.yaml\").dump({\"a\": {\"b\": {\"c\": 2, \"d\": 1}}})\n    run_copy_metrics(\n        str(data_dir / \"m1_temp.yaml\"),\n        str(data_dir / \"m1.yaml\"),\n        single_stage=False,\n        commit=\"add m1\",\n        name=\"cp-m1\",\n        plots=[str(data_dir / \"m1.yaml\")],\n    )\n    with (tmp_dir / \"dvc.yaml\").modify() as d:\n        # trying to make an output overlaps error\n        d[\"stages\"][\"corrupted-stage\"] = {\"cmd\": \"mkdir data\", \"outs\": [\"data\"]}\n\n    # running by clearing and not clearing stuffs\n    # so as it works even for optimized cases\n    if clear_before_run:\n        remove(data_dir)\n        remove(dvc.cache.local.path)\n\n    dvc._reset()\n\n    result = dvc.plots.show(onerror=onerror_collect)\n    assert isinstance(\n        get_plot(result, \"workspace\", endkey=\"error\"),\n        OverlappingOutputPathsError,\n    )\n\n\ndef test_plots_show_nested_x_dict(tmp_dir, dvc, scm):\n    rel_pipeline_dir = \"pipelines/data-increment\"\n\n    pipeline_rel_dvclive_metrics_dir = \"dvclive/plots/metrics\"\n    dvc_rel_dvclive_metrics_dir = (\n        f\"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}\"\n    )\n\n    pipeline_dir = tmp_dir / rel_pipeline_dir\n    dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir\n    dvclive_metrics_dir.mkdir(parents=True)\n\n    def _get_plot_defn(rel_dir: str) -> dict:\n        return {\n            \"template\": \"simple\",\n            \"x\": {f\"{rel_dir}/Max_Leaf_Nodes.tsv\": \"Max_Leaf_Nodes\"},\n            \"y\": {f\"{rel_dir}/Error.tsv\": \"Error\"},\n        }\n\n    (pipeline_dir / \"dvc.yaml\").dump(\n        {\n            \"plots\": [\n                {\n                    \"Error vs max_leaf_nodes\": _get_plot_defn(\n                        pipeline_rel_dvclive_metrics_dir\n                    )\n                },\n            ]\n        },\n    )\n\n    dvclive_metrics_dir.gen(\n        {\n            \"Error.tsv\": \"step\\tError\\n0\\t0.11\\n1\\t0.22\\n2\\t0.44\\n\",\n            \"Max_Leaf_Nodes.tsv\": \"step\\tMax_Leaf_Nodes\\n0\\t5\\n1\\t50\\n2\\t500\\n\",\n        }\n    )\n\n    scm.commit(\"add dvc.yaml and dvclive metrics\")\n\n    result = dvc.plots.show()\n    assert result == {\n        \"workspace\": {\n            \"definitions\": {\n                \"data\": {\n                    f\"{rel_pipeline_dir}/dvc.yaml\": {\n                        \"data\": {\n                            \"Error vs max_leaf_nodes\": _get_plot_defn(\n                                dvc_rel_dvclive_metrics_dir\n                            )\n                        },\n                    }\n                }\n            },\n            \"sources\": {\n                \"data\": {\n                    f\"{dvc_rel_dvclive_metrics_dir}/Error.tsv\": {\n                        \"data\": [\n                            {\"Error\": \"0.11\", \"step\": \"0\"},\n                            {\"Error\": \"0.22\", \"step\": \"1\"},\n                            {\"Error\": \"0.44\", \"step\": \"2\"},\n                        ],\n                        \"props\": {},\n                    },\n                    f\"{dvc_rel_dvclive_metrics_dir}/Max_Leaf_Nodes.tsv\": {\n                        \"data\": [\n                            {\"Max_Leaf_Nodes\": \"5\", \"step\": \"0\"},\n                            {\"Max_Leaf_Nodes\": \"50\", \"step\": \"1\"},\n                            {\"Max_Leaf_Nodes\": \"500\", \"step\": \"2\"},\n                        ],\n                        \"props\": {},\n                    },\n                }\n            },\n        }\n    }\n\n\ndef test_dir_plots(tmp_dir, dvc, run_copy_metrics):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n\n    fname = \"file.json\"\n    (tmp_dir / fname).dump_json(metric, sort_keys=True)\n\n    p1 = \"subdir/p1.json\"\n    p2 = \"subdir/p2.json\"\n    tmp_dir.dvc.run(\n        cmd=(\n            f\"mkdir subdir && python copy.py {fname} {p1} && \"\n            f\"python copy.py {fname} {p2}\"\n        ),\n        deps=[fname],\n        single_stage=False,\n        plots=[\"subdir\"],\n        name=\"copy_double\",\n    )\n    props = {\"title\": \"TITLE\"}\n    dvc.plots.modify(\"subdir\", props)\n\n    result = dvc.plots.show()\n\n    assert set(get_plot(result, \"workspace\")) == {p1, p2}\n    assert get_plot(result, \"workspace\", typ=\"definitions\", file=\"\") == {\n        p1: props,\n        p2: props,\n    }\n\n\ndef test_ignore_parsing_error(tmp_dir, dvc, run_copy_metrics):\n    with open(\"file\", \"wb\", encoding=None) as fobj:\n        fobj.write(b\"\\xc1\")\n\n    run_copy_metrics(\n        \"file\", \"plot_file.json\", plots=[\"plot_file.json\"], name=\"copy-metric\"\n    )\n    result = dvc.plots.show(onerror=onerror_collect)\n\n    assert isinstance(\n        get_plot(result, \"workspace\", file=\"plot_file.json\", endkey=\"error\"),\n        EncodingError,\n    )\n\n\n@pytest.mark.parametrize(\n    \"file,path_kwargs\",\n    [\n        (PROJECT_FILE, {\"revision\": \"workspace\", \"endkey\": \"error\"}),\n        (\n            \"plot.yaml\",\n            {\"revision\": \"workspace\", \"file\": \"plot.yaml\", \"endkey\": \"error\"},\n        ),\n    ],\n)\ndef test_log_errors(tmp_dir, scm, dvc, run_copy_metrics, file, path_kwargs, capsys):\n    metric = [{\"val\": 2}, {\"val\": 3}]\n    (tmp_dir / \"metric_t.yaml\").dump(metric)\n    run_copy_metrics(\n        \"metric_t.yaml\",\n        \"plot.yaml\",\n        plots=[\"plot.yaml\"],\n        single_stage=False,\n        name=\"train\",\n    )\n    scm.tag(\"v1\")\n\n    with open(file, \"a\", encoding=\"utf-8\") as fd:\n        fd.write(\"\\nMALFORMED!\")\n\n    result = dvc.plots.show(onerror=onerror_collect)\n    _, error = capsys.readouterr()\n\n    assert isinstance(get_plot(result, **path_kwargs), YAMLFileCorruptedError)\n    assert (\n        \"DVC failed to load some plots for following revisions: 'workspace'.\" in error\n    )\n\n\n@pytest.mark.parametrize(\"ext\", [\"jpg\", \"svg\"])\ndef test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template, ext):\n    file1 = f\"image.{ext}\"\n    file2 = f\"plot.{ext}\"\n    with open(file1, \"wb\") as fd:\n        fd.write(b\"content\")\n\n    dvc.add([file1])\n    run_copy_metrics(\n        file1,\n        file2,\n        commit=\"run training\",\n        plots=[file2],\n        name=\"s2\",\n        single_stage=False,\n    )\n\n    scm.add([\"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"initial\")\n\n    scm.tag(\"v1\")\n\n    with open(file2, \"wb\") as fd:\n        fd.write(b\"content2\")\n\n    result = dvc.plots.show(revs=[\"v1\", \"workspace\"])\n    assert get_plot(result, \"v1\", file=file2) == b\"content\"\n    assert get_plot(result, \"workspace\", file=file2) == b\"content2\"\n\n\ndef test_collect_non_existing_dir(tmp_dir, dvc, run_copy_metrics):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    subdir_metric = [{\"y\": 101, \"x\": 3}, {\"y\": 202, \"x\": 4}]\n\n    pname = \"source.json\"\n    (tmp_dir / pname).dump_json(metric, sort_keys=True)\n\n    sname = \"subdir_source.json\"\n    (tmp_dir / sname).dump_json(subdir_metric, sort_keys=True)\n\n    p1 = os.path.join(\"subdir\", \"p1.json\")\n    p2 = os.path.join(\"subdir\", \"p2.json\")\n    subdir_stage = tmp_dir.dvc.run(\n        cmd=(\n            f\"mkdir subdir && python copy.py {sname} {p1} && \"\n            f\"python copy.py {sname} {p2}\"\n        ),\n        deps=[sname],\n        plots=[\"subdir\"],\n        name=\"copy_double\",\n    )\n\n    run_copy_metrics(\n        pname,\n        \"plot.json\",\n        plots=[\"plot.json\"],\n        name=\"copy-metric\",\n        commit=\"there is metric\",\n    )\n\n    remove(subdir_stage.outs[0].cache_path)\n    remove(subdir_stage.outs[0].fs_path)\n\n    result = dvc.plots.show()\n    assert get_plot(result, \"workspace\", typ=\"definitions\", file=\"\", endkey=\"error\")\n    # make sure others gets loaded\n    assert get_plot(result, \"workspace\", file=\"plot.json\") == metric\n\n\n@pytest.mark.parametrize(\n    \"plot_config,expected_datafiles\",\n    [\n        (\n            {\n                \"comparison\": {\n                    \"x\": {\"data1.json\": \"a\"},\n                    \"y\": {\"sub/dir/data2.json\": \"b\"},\n                }\n            },\n            [\"data1.json\", os.path.join(\"sub\", \"dir\", \"data2.json\")],\n        ),\n        (\n            {\"data1.json\": {\"x\": \"c\", \"y\": \"a\", \"title\": \"File as key test\"}},\n            [\"data1.json\"],\n        ),\n        (\n            {\n                \"infer_data_from_y\": {\n                    \"x\": \"a\",\n                    \"y\": {\"data1.json\": \"b\", \"sub/dir/data2.json\": \"c\"},\n                }\n            },\n            [\"data1.json\", os.path.join(\"sub\", \"dir\", \"data2.json\")],\n        ),\n    ],\n)\ndef test_top_level_plots(tmp_dir, dvc, plot_config, expected_datafiles):\n    data = {\n        \"data1.json\": [\n            {\"a\": 1, \"b\": 0.1, \"c\": 0.01},\n            {\"a\": 2, \"b\": 0.2, \"c\": 0.02},\n        ],\n        os.path.join(\"sub\", \"dir\", \"data.json\"): [\n            {\"a\": 6, \"b\": 0.6, \"c\": 0.06},\n            {\"a\": 7, \"b\": 0.7, \"c\": 0.07},\n        ],\n    }\n\n    for filename, content in data.items():\n        dirname = os.path.dirname(filename)\n        if dirname:\n            os.makedirs(dirname)\n        (tmp_dir / filename).dump_json(content, sort_keys=True)\n\n    config_file = \"dvc.yaml\"\n    with modify_yaml(config_file) as dvcfile_content:\n        dvcfile_content[\"plots\"] = [plot_config]\n\n    result = dvc.plots.show()\n\n    assert plot_config == get_plot(\n        result, \"workspace\", typ=\"definitions\", file=config_file\n    )\n\n    for filename, content in data.items():\n        if filename in expected_datafiles:\n            assert content == get_plot(result, \"workspace\", file=filename)\n        else:\n            assert filename not in get_plot(result, \"workspace\")\n\n\ndef test_show_plots_defined_with_native_os_path(tmp_dir, dvc, scm, capsys):\n    \"\"\"Regression test for #8689\"\"\"\n    top_level_plot = os.path.join(\"subdir\", \"top_level_plot.csv\")\n    stage_plot = os.path.join(\"subdir\", \"stage_plot.csv\")\n    (tmp_dir / \"subdir\").mkdir()\n    (tmp_dir / top_level_plot).write_text(\"foo,bar\\n1,2\")\n    (tmp_dir / stage_plot).write_text(\"foo,bar\\n1,2\")\n    (tmp_dir / \"dvc.yaml\").dump({\"plots\": [top_level_plot]})\n\n    dvc.stage.add(name=\"foo\", plots=[stage_plot], cmd=\"echo foo\")\n\n    plots = dvc.plots.show()\n\n    # sources are in posixpath format\n    sources = plots[\"workspace\"][\"sources\"][\"data\"]\n    assert sources[\"subdir/top_level_plot.csv\"][\"data\"] == [{\"foo\": \"1\", \"bar\": \"2\"}]\n    assert sources[\"subdir/stage_plot.csv\"][\"data\"] == [{\"foo\": \"1\", \"bar\": \"2\"}]\n    # definitions are in native os format\n    definitions = plots[\"workspace\"][\"definitions\"][\"data\"]\n    assert top_level_plot in definitions[\"dvc.yaml\"][\"data\"]\n    assert stage_plot in definitions[\"\"][\"data\"]\n\n    capsys.readouterr()\n    assert main([\"plots\", \"show\", \"--json\"]) == 0\n    out, _ = capsys.readouterr()\n    json_out = json.loads(out)\n    assert \"errors\" not in json_out\n\n    json_data = json_out[\"data\"]\n    assert json_data[f\"{top_level_plot}\"]\n    assert json_data[stage_plot]\n\n\n@pytest.mark.parametrize(\n    \"plot_config,expanded_config,expected_datafiles\",\n    [\n        (\n            {\n                \"comparison\": {\n                    \"x\": {\"${data1}\": \"${a}\"},\n                    \"y\": {\"sub/dir/data2.json\": \"${b}\"},\n                }\n            },\n            {\n                \"comparison\": {\n                    \"x\": {\"data1.json\": \"a\"},\n                    \"y\": {\"sub/dir/data2.json\": \"b\"},\n                }\n            },\n            [\"data1.json\", os.path.join(\"sub\", \"dir\", \"data2.json\")],\n        ),\n        (\n            {\"${data1}\": None},\n            {\"data1.json\": {}},\n            [\"data1.json\"],\n        ),\n        (\n            \"${data1}\",\n            {\"data1.json\": {}},\n            [\"data1.json\"],\n        ),\n    ],\n)\ndef test_top_level_parametrized(\n    tmp_dir, dvc, plot_config, expanded_config, expected_datafiles\n):\n    (tmp_dir / \"params.yaml\").dump(\n        {\"data1\": \"data1.json\", \"a\": \"a\", \"b\": \"b\", \"c\": \"c\"}\n    )\n    data = {\n        \"data1.json\": [\n            {\"a\": 1, \"b\": 0.1, \"c\": 0.01},\n            {\"a\": 2, \"b\": 0.2, \"c\": 0.02},\n        ],\n        os.path.join(\"sub\", \"dir\", \"data.json\"): [\n            {\"a\": 6, \"b\": 0.6, \"c\": 0.06},\n            {\"a\": 7, \"b\": 0.7, \"c\": 0.07},\n        ],\n    }\n\n    for filename, content in data.items():\n        dirname = os.path.dirname(filename)\n        if dirname:\n            os.makedirs(dirname)\n        (tmp_dir / filename).dump_json(content, sort_keys=True)\n\n    config_file = \"dvc.yaml\"\n    with modify_yaml(config_file) as dvcfile_content:\n        dvcfile_content[\"plots\"] = [plot_config]\n\n    result = dvc.plots.show()\n\n    assert expanded_config == get_plot(\n        result, \"workspace\", typ=\"definitions\", file=config_file\n    )\n\n    for filename, content in data.items():\n        if filename in expected_datafiles:\n            assert content == get_plot(result, \"workspace\", file=filename)\n        else:\n            assert filename not in get_plot(result, \"workspace\")\n"
  },
  {
    "path": "tests/func/repro/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/repro/test_repro.py",
    "content": "import filecmp\nimport os\nimport shutil\nfrom copy import deepcopy\nfrom textwrap import dedent\n\nimport pytest\nfrom funcy import lsplit\n\nfrom dvc.cli import main\nfrom dvc.dvcfile import LOCK_FILE, PROJECT_FILE\nfrom dvc.exceptions import CyclicGraphError, ReproductionError\nfrom dvc.fs import system\nfrom dvc.output import Output\nfrom dvc.stage import PipelineStage, Stage\nfrom dvc.stage.cache import RunCacheNotSupported\nfrom dvc.stage.exceptions import StageFileDoesNotExistError, StageNotFound\nfrom dvc.testing import matchers as M\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import modify_yaml\nfrom dvc_data.hashfile.hash import file_md5\n\n\ndef test_non_existing_stage_name(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"file1\", \"file1\")\n    run_copy(\"file1\", \"file2\", name=\"copy-file1-file2\")\n\n    with pytest.raises(StageNotFound):\n        dvc.freeze(\":copy-file1-file3\")\n\n    assert main([\"freeze\", \":copy-file1-file3\"]) != 0\n\n\ndef test_repro_fail(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    os.unlink(\"copy.py\")\n    assert main([\"repro\", stage.addressing]) != 0\n\n\ndef test_repro_frozen(tmp_dir, dvc, run_copy):\n    (data_stage,) = tmp_dir.dvc_gen(\"data\", \"foo\")\n    stage0 = run_copy(\"data\", \"stage0\", name=\"copy-data-stage0\")\n    run_copy(\"stage0\", \"stage1\", name=\"copy-data-stage1\")\n    run_copy(\"stage1\", \"stage2\", name=\"copy-data-stage2\")\n\n    dvc.freeze(\"copy-data-stage1\")\n\n    tmp_dir.gen(\"data\", \"bar\")\n    stages = dvc.reproduce()\n    assert stages == [data_stage, stage0]\n\n\ndef test_downstream(tmp_dir, dvc):\n    # The dependency graph should look like this:\n    #\n    #       E\n    #      / \\\n    #     D   F\n    #    / \\   \\\n    #   B   C   G\n    #    \\ /\n    #     A\n    #\n    assert main([\"stage\", \"add\", \"--run\", \"-n\", \"A-gen\", \"-o\", \"A\", \"echo A>A\"]) == 0\n    assert (\n        main([\"stage\", \"add\", \"--run\", \"-n\", \"B-gen\", \"-d\", \"A\", \"-o\", \"B\", \"echo B>B\"])\n        == 0\n    )\n    assert (\n        main(\n            [\n                \"stage\",\n                \"add\",\n                \"--run\",\n                \"-n\",\n                \"C-gen\",\n                \"-d\",\n                \"A\",\n                \"-o\",\n                \"C\",\n                \"echo C>C\",\n            ]\n        )\n        == 0\n    )\n    assert (\n        main(\n            [\n                \"stage\",\n                \"add\",\n                \"--run\",\n                \"-n\",\n                \"D-gen\",\n                \"-d\",\n                \"B\",\n                \"-d\",\n                \"C\",\n                \"-o\",\n                \"D\",\n                \"echo D>D\",\n            ]\n        )\n        == 0\n    )\n    assert main([\"stage\", \"add\", \"--run\", \"-n\", \"G-gen\", \"-o\", \"G\", \"echo G>G\"]) == 0\n    assert (\n        main([\"stage\", \"add\", \"--run\", \"-n\", \"F-gen\", \"-d\", \"G\", \"-o\", \"F\", \"echo F>F\"])\n        == 0\n    )\n    assert (\n        main(\n            [\n                \"stage\",\n                \"add\",\n                \"--run\",\n                \"-n\",\n                \"E-gen\",\n                \"-d\",\n                \"D\",\n                \"-d\",\n                \"F\",\n                \"-o\",\n                \"E\",\n                \"echo E>E\",\n            ]\n        )\n        == 0\n    )\n\n    # We want the evaluation to move from B to E\n    #\n    #       E\n    #      /\n    #     D\n    #    /\n    #   B\n    #\n    evaluation = dvc.reproduce(PROJECT_FILE + \":B-gen\", downstream=True, force=True)\n\n    assert len(evaluation) == 3\n    assert all(isinstance(stage, PipelineStage) for stage in evaluation)\n    assert all(stage.relpath == PROJECT_FILE for stage in evaluation)\n    assert [stage.name for stage in evaluation] == [\"B-gen\", \"D-gen\", \"E-gen\"]\n\n    # B, C should be run (in any order) before D\n    # See https://github.com/treeverse/dvc/issues/3602\n    evaluation = dvc.reproduce(PROJECT_FILE + \":A-gen\", downstream=True, force=True)\n\n    assert len(evaluation) == 5\n    assert all(isinstance(stage, PipelineStage) for stage in evaluation)\n    assert all(stage.relpath == PROJECT_FILE for stage in evaluation)\n    assert [stage.name for stage in evaluation] == [\n        \"A-gen\",\n        M.any_of(\"B-gen\", \"C-gen\"),\n        M.any_of(\"B-gen\", \"C-gen\"),\n        \"D-gen\",\n        \"E-gen\",\n    ]\n\n\ndef test_repro_when_cmd_changes(tmp_dir, dvc, run_copy, mocker):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert not dvc.reproduce(stage.addressing)\n\n    from dvc.stage.run import cmd_run\n\n    m = mocker.patch(\"dvc.stage.run.cmd_run\", wraps=cmd_run)\n\n    with modify_yaml(\"dvc.yaml\") as d:\n        # change cmd spacing by two\n        d[\"stages\"][\"copy-foo-bar\"][\"cmd\"] = \"  \".join(stage.cmd.split())\n\n    assert dvc.status([stage.addressing]) == {stage.addressing: [\"changed command\"]}\n    assert dvc.reproduce(stage.addressing)[0] == stage\n    m.assert_called_once_with(stage, dry=False, run_env=None)\n\n\ndef test_repro_when_new_deps_is_added_in_dvcfile(tmp_dir, dvc, run_copy, copy_script):\n    from dvc.dvcfile import load_file\n\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    stage = dvc.run(\n        cmd=\"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n        outs=[\"foobar\"],\n        deps=[\"foo\"],\n        name=\"copy-file\",\n    )\n    target = PROJECT_FILE + \":copy-file\"\n    assert not dvc.reproduce(target)\n\n    dvcfile = load_file(dvc, stage.path)\n    data, _ = dvcfile._load()\n    data[\"stages\"][\"copy-file\"][\"deps\"] += [\"copy.py\"]\n    (tmp_dir / stage.path).dump(data)\n\n    assert dvc.reproduce(target)[0] == stage\n\n\ndef test_repro_when_new_outs_is_added_in_dvcfile(tmp_dir, dvc, copy_script):\n    from dvc.dvcfile import load_file\n\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    stage = dvc.run(\n        cmd=\"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n        outs=[],  # scenario where user forgot to add\n        deps=[\"foo\"],\n        name=\"copy-file\",\n    )\n    target = \":copy-file\"\n    assert not dvc.reproduce(target)\n\n    dvcfile = load_file(dvc, stage.path)\n    data, _ = dvcfile._load()\n    data[\"stages\"][\"copy-file\"][\"outs\"] = [\"foobar\"]\n    (tmp_dir / stage.path).dump(data)\n\n    assert dvc.reproduce(target)[0] == stage\n\n\ndef test_repro_when_new_deps_is_moved(tmp_dir, dvc, copy_script):\n    from dvc.dvcfile import load_file\n\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"foo\"})\n    stage = dvc.run(\n        cmd=\"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n        outs=[\"foobar\"],\n        deps=[\"foo\"],\n        name=\"copy-file\",\n    )\n    target = \":copy-file\"\n    assert not dvc.reproduce(target)\n\n    # hardcode values in source code, ignore sys.argv\n    tmp_dir.gen(\n        \"copy.py\",\n        \"\"\"\nimport shutil\n\nshutil.copyfile('bar', 'foobar')\n\"\"\",\n    )\n    from shutil import move\n\n    move(\"foo\", \"bar\")\n\n    dvcfile = load_file(dvc, stage.path)\n    data, _ = dvcfile._load()\n    data[\"stages\"][\"copy-file\"][\"deps\"] = [\"bar\"]\n    (tmp_dir / stage.path).dump(data)\n\n    assert dvc.reproduce(target)[0] == stage\n\n\ndef test_repro_when_new_out_overlaps_others_stage_outs(tmp_dir, dvc):\n    from dvc.exceptions import OverlappingOutputPathsError\n\n    tmp_dir.gen({\"dir\": {\"file1\": \"file1\"}, \"foo\": \"foo\"})\n    dvc.add(\"dir\")\n    (tmp_dir / PROJECT_FILE).dump(\n        {\n            \"stages\": {\n                \"run-copy\": {\n                    \"cmd\": \"python copy {} {}\".format(\"foo\", \"dir/foo\"),\n                    \"deps\": [\"foo\"],\n                    \"outs\": [\"dir/foo\"],\n                }\n            }\n        },\n    )\n    with pytest.raises(OverlappingOutputPathsError):\n        dvc.reproduce(\":run-copy\")\n\n\ndef test_repro_when_new_deps_added_does_not_exist(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (tmp_dir / PROJECT_FILE).dump(\n        {\n            \"stages\": {\n                \"run-copy\": {\n                    \"cmd\": \"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n                    \"deps\": [\"foo\", \"bar\"],\n                    \"outs\": [\"foobar\"],\n                }\n            }\n        },\n    )\n    with pytest.raises(ReproductionError):\n        dvc.reproduce(\":run-copy\")\n\n\ndef test_repro_when_new_outs_added_does_not_exist(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (tmp_dir / PROJECT_FILE).dump(\n        {\n            \"stages\": {\n                \"run-copy\": {\n                    \"cmd\": \"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n                    \"deps\": [\"foo\"],\n                    \"outs\": [\"foobar\", \"bar\"],\n                }\n            }\n        },\n    )\n    with pytest.raises(ReproductionError):\n        dvc.reproduce(\":run-copy\")\n\n\ndef test_repro_when_lockfile_gets_deleted(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (tmp_dir / PROJECT_FILE).dump(\n        {\n            \"stages\": {\n                \"run-copy\": {\n                    \"cmd\": \"python copy.py {} {}\".format(\"foo\", \"foobar\"),\n                    \"deps\": [\"foo\"],\n                    \"outs\": [\"foobar\"],\n                }\n            }\n        },\n    )\n    assert dvc.reproduce(\":run-copy\")\n    assert os.path.exists(LOCK_FILE)\n\n    assert not dvc.reproduce(\":run-copy\")\n    os.unlink(LOCK_FILE)\n    stages = dvc.reproduce(\":run-copy\")\n    assert stages\n    assert stages[0].relpath == PROJECT_FILE\n    assert stages[0].name == \"run-copy\"\n\n\ndef test_cyclic_graph_error(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    run_copy(\"bar\", \"baz\", name=\"copy-bar-baz\")\n    run_copy(\"baz\", \"foobar\", name=\"copy-baz-foobar\")\n\n    with modify_yaml(\"dvc.yaml\") as data:\n        data[\"stages\"][\"copy-baz-foo\"] = {\n            \"cmd\": \"echo baz > foo\",\n            \"deps\": [\"baz\"],\n            \"outs\": [\"foo\"],\n        }\n\n    with pytest.raises(CyclicGraphError):\n        dvc.reproduce(\":copy-baz-foo\")\n\n\ndef test_repro_multiple_params(tmp_dir, dvc):\n    from dvc.stage.utils import split_params_deps\n    from tests.func.test_run import supported_params\n\n    (tmp_dir / \"params2.yaml\").dump(supported_params)\n    (tmp_dir / \"params.yaml\").dump(supported_params)\n\n    (tmp_dir / \"foo\").write_text(\"foo\")\n    stage = dvc.run(\n        name=\"read_params\",\n        deps=[\"foo\"],\n        outs=[\"bar\"],\n        params=[\"params2.yaml:lists,floats,name\", \"answer,floats,nested.nested1\"],\n        cmd=\"cat params2.yaml params.yaml > bar\",\n    )\n\n    params, deps = split_params_deps(stage)\n    assert len(params) == 2\n    assert len(deps) == 1\n    assert len(stage.outs) == 1\n\n    lockfile = stage.dvcfile._lockfile\n    assert lockfile.load()[\"stages\"][\"read_params\"][\"params\"] == {\n        \"params2.yaml\": {\n            \"lists\": [42, 42.0, \"42\"],\n            \"floats\": 42.0,\n            \"name\": \"Answer\",\n        },\n        \"params.yaml\": {\n            \"answer\": 42,\n            \"floats\": 42.0,\n            \"nested.nested1\": {\"nested2\": \"42\", \"nested2-2\": 41.99999},\n        },\n    }\n    data, _ = stage.dvcfile._load()\n    params = data[\"stages\"][\"read_params\"][\"params\"]\n\n    custom, defaults = lsplit(lambda v: isinstance(v, dict), params)\n    assert set(custom[0][\"params2.yaml\"]) == {\"name\", \"lists\", \"floats\"}\n    assert set(defaults) == {\"answer\", \"floats\", \"nested.nested1\"}\n\n    assert not dvc.reproduce(stage.addressing)\n    params = deepcopy(supported_params)\n    params[\"answer\"] = 43\n    (tmp_dir / \"params.yaml\").dump(params)\n\n    assert dvc.reproduce(stage.addressing) == [stage]\n\n\n@pytest.mark.parametrize(\"multiline\", [True, False])\ndef test_repro_list_of_commands_in_order(tmp_dir, dvc, multiline):\n    cmd = [\"echo foo>foo\", \"echo bar>bar\"]\n    if multiline:\n        cmd = \"\\n\".join(cmd)\n\n    (tmp_dir / \"dvc.yaml\").dump({\"stages\": {\"multi\": {\"cmd\": cmd}}})\n\n    (tmp_dir / \"dvc.yaml\").write_text(\n        dedent(\n            \"\"\"\\\n            stages:\n              multi:\n                cmd:\n                - echo foo>foo\n                - echo bar>bar\n        \"\"\"\n        )\n    )\n    dvc.reproduce(targets=[\"multi\"])\n    assert (tmp_dir / \"foo\").read_text() == \"foo\\n\"\n    assert (tmp_dir / \"bar\").read_text() == \"bar\\n\"\n\n\n@pytest.mark.parametrize(\"multiline\", [True, False])\ndef test_repro_list_of_commands_raise_and_stops_after_failure(tmp_dir, dvc, multiline):\n    cmd = [\"echo foo>foo\", \"failed_command\", \"echo baz>bar\"]\n    if multiline:\n        cmd = \"\\n\".join(cmd)\n\n    (tmp_dir / \"dvc.yaml\").dump({\"stages\": {\"multi\": {\"cmd\": cmd}}})\n\n    with pytest.raises(ReproductionError):\n        dvc.reproduce(targets=[\"multi\"])\n    assert (tmp_dir / \"foo\").read_text() == \"foo\\n\"\n    assert not (tmp_dir / \"bar\").exists()\n\n\ndef test_repro_pulls_missing_data_source(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_pulls_missing_import(tmp_dir, dvc, mocker, erepo_dir, local_remote):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"first\")\n\n    foo_import = dvc.imp(os.fspath(erepo_dir), \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo_import.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_allow_missing(tmp_dir, dvc):\n    tmp_dir.gen(\"fixed\", \"fixed\")\n    dvc.stage.add(name=\"create-foo\", cmd=\"echo foo > foo\", deps=[\"fixed\"], outs=[\"foo\"])\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    (create_foo, _) = dvc.reproduce()\n\n    remove(\"foo\")\n    remove(create_foo.outs[0].cache_path)\n    remove(dvc.stage_cache.cache_dir)\n\n    ret = dvc.reproduce(allow_missing=True)\n    # both stages are skipped\n    assert not ret\n\n\ndef test_repro_allow_missing_and_pull(tmp_dir, dvc, mocker, local_remote):\n    tmp_dir.gen(\"fixed\", \"fixed\")\n    dvc.stage.add(name=\"create-foo\", cmd=\"echo foo > foo\", deps=[\"fixed\"], outs=[\"foo\"])\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    (create_foo,) = dvc.reproduce(\"create-foo\")\n\n    dvc.push()\n\n    remove(\"foo\")\n    remove(create_foo.outs[0].cache_path)\n    remove(dvc.stage_cache.cache_dir)\n\n    ret = dvc.reproduce(pull=True, allow_missing=True)\n    # create-foo is skipped ; copy-foo pulls missing dep\n    assert len(ret) == 1\n\n\ndef test_repro_pulls_continue_without_run_cache(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n    mocker.patch.object(\n        dvc.stage_cache, \"pull\", side_effect=RunCacheNotSupported(\"foo\")\n    )\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_skip_pull_if_no_run_cache_is_passed(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n    spy_pull = mocker.spy(dvc.stage_cache, \"pull\")\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True, run_cache=False)\n    assert not spy_pull.called\n\n\ndef test_repro_no_commit(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    remove(dvc.cache.local.path)\n    ret = main([\"repro\", stage.addressing, \"--no-commit\"])\n    assert ret == 0\n    # run-cache should be skipped if `-no-commit`.\n    assert not os.path.isdir(dvc.cache.local.path)\n\n\ndef test_repro_all_pipelines(mocker, dvc):\n    stages = [\n        dvc.run(outs=[\"start.txt\"], cmd=\"echo start > start.txt\", name=\"start\"),\n        dvc.run(\n            deps=[\"start.txt\"],\n            outs=[\"middle.txt\"],\n            cmd=\"echo middle > middle.txt\",\n            name=\"middle\",\n        ),\n        dvc.run(\n            deps=[\"middle.txt\"],\n            outs=[\"final.txt\"],\n            cmd=\"echo final > final.txt\",\n            name=\"final\",\n        ),\n        dvc.run(\n            outs=[\"disconnected.txt\"],\n            cmd=\"echo other > disconnected.txt\",\n            name=\"disconnected\",\n        ),\n    ]\n\n    from dvc_data.hashfile.state import StateNoop\n\n    dvc.state = StateNoop()\n\n    mock_reproduce = mocker.patch.object(Stage, \"reproduce\", side_effect=stages)\n    ret = main([\"repro\", \"--all-pipelines\"])\n    assert ret == 0\n    assert mock_reproduce.call_count == 4\n\n\nclass TestReproAlreadyCached:\n    def test(self, dvc):\n        stage = dvc.run(\n            always_changed=True,\n            deps=[],\n            outs=[\"datetime.txt\"],\n            cmd='python -c \"import time; print(time.time())\" > datetime.txt',\n            name=\"datetime\",\n        )\n        run_out = stage.outs[0]\n        repro_out = dvc.reproduce(stage.addressing)[0].outs[0]\n\n        assert run_out.hash_info != repro_out.hash_info\n\n    def test_force_with_dependencies(self, tmp_dir, dvc):\n        tmp_dir.dvc_gen(\"foo\", \"foo\")\n        stage = dvc.run(\n            name=\"datetime\",\n            deps=[\"foo\"],\n            outs=[\"datetime.txt\"],\n            cmd='python -c \"import time; print(time.time())\" > datetime.txt',\n        )\n\n        ret = main([\"repro\", \"--force\", stage.addressing])\n        assert ret == 0\n\n        saved_stage = dvc.stage.get_target(stage.addressing)\n        assert stage.outs[0].hash_info != saved_stage.outs[0].hash_info\n\n    def test_force_import(self, mocker, tmp_dir, dvc):\n        from dvc.dependency import base\n\n        tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n        ret = main([\"import-url\", \"foo\", \"bar\"])\n        assert ret == 0\n\n        spy_get = mocker.spy(base, \"fs_download\")\n        spy_checkout = mocker.spy(Output, \"checkout\")\n\n        assert main([\"unfreeze\", \"bar.dvc\"]) == 0\n        ret = main([\"repro\", \"--force\", \"bar.dvc\"])\n        assert ret == 0\n        assert spy_get.call_count == 1\n        assert spy_checkout.call_count == 0\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"not on nt\")\ndef test_repro_shell(tmp_dir, monkeypatch, dvc):\n    monkeypatch.setenv(\"SHELL\", \"/bin/sh\")\n    dvc.run(outs=[\"shell.txt\"], cmd=\"echo $SHELL > shell.txt\", name=\"echo-shell\")\n    shell = os.getenv(\"SHELL\")\n\n    assert (tmp_dir / \"shell.txt\").read_text().rstrip() == shell\n    (tmp_dir / \"shell.txt\").unlink()\n\n    dvc.reproduce(\"echo-shell\")\n    assert (tmp_dir / \"shell.txt\").read_text().rstrip() == shell\n\n\ndef test_cmd_repro(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    shutil.copyfile(\"bar\", \"foo\")\n\n    ret = main([\"status\"])\n    assert ret == 0\n\n    ret = main([\"repro\", stage.addressing])\n    assert ret == 0\n\n    ret = main([\"repro\", \"non-existing-file\"])\n    assert ret != 0\n\n\ndef test_repro_dep_under_dir(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"data\", {\"file\": \"file\", \"sub\": {\"foo\": \"foo\"}})\n\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"data/file\", \"copy.py\"],\n        cmd=\"python copy.py data/file file1\",\n        name=\"copy-data-file1\",\n    )\n\n    assert filecmp.cmp(\"file1\", \"data/file\", shallow=False)\n\n    os.unlink(\"data/file\")\n    shutil.copyfile(\"foo\", \"data/file\")\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 2\n    assert filecmp.cmp(\"file1\", \"foo\", shallow=False)\n\n\ndef test_repro_dep_dir_with_outputs_under_it(\n    tmp_dir,\n    dvc,\n    copy_script,\n):\n    tmp_dir.gen(\"foo\", \"foo\")\n    file_stage, _ = tmp_dir.dvc_gen(\n        {\"data/file\": \"file\", \"data/sub\": {\"foo\": \"foo\", \"bar\": \"bar\"}}\n    )\n    dvc.run(\n        cmd=\"ls data/file data/sub\",\n        deps=[\"data/file\", \"data/sub\"],\n        name=\"list-files\",\n    )\n    copy_stage = dvc.run(\n        deps=[\"data\"],\n        outs=[\"file1\"],\n        cmd=\"python copy.py data file1\",\n        name=\"copy-data-file1\",\n    )\n    os.unlink(\"data/file\")\n    shutil.copyfile(\"foo\", \"data/file\")\n    assert dvc.reproduce(copy_stage.addressing) == [file_stage, copy_stage]\n\n\ndef test_repro_force(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    stages = dvc.reproduce(stage.addressing, force=True)\n    assert len(stages) == 2\n\n\ndef test_repro_changed_code(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    with (tmp_dir / \"copy.py\").open(\"a+\", encoding=\"utf8\") as f:\n        f.write(\"\\nshutil.copyfile('bar', sys.argv[2])\")\n    stages = dvc.reproduce(stage.addressing)\n\n    assert filecmp.cmp(\"file1\", \"bar\", shallow=False)\n    assert len(stages) == 1\n\n\ndef test_repro_changed_data(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    shutil.copyfile(\"bar\", \"foo\")\n\n    stages = dvc.reproduce(stage.addressing)\n\n    assert filecmp.cmp(\"file1\", \"bar\", shallow=False)\n    assert len(stages) == 2\n\n\ndef test_repro_dry(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    shutil.copyfile(\"bar\", \"foo\")\n\n    stages = dvc.reproduce(stage.addressing, dry=True)\n\n    assert len(stages) == 2\n    assert not filecmp.cmp(\"file1\", \"bar\", shallow=False)\n\n    ret = main([\"repro\", \"--dry\", stage.addressing])\n    assert ret == 0\n    assert not filecmp.cmp(\"file1\", \"bar\", shallow=False)\n\n\ndef test_repro_up_to_date(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    ret = main([\"repro\", stage.addressing])\n    assert ret == 0\n\n\ndef test_repro_dry_no_exec(tmp_dir, dvc):\n    deps = []\n    for d in range(3):\n        idir = f\"idir{d}\"\n        odir = f\"odir{d}\"\n\n        deps.append(\"-d\")\n        deps.append(odir)\n\n        os.mkdir(idir)\n\n        f = os.path.join(idir, \"file\")\n        with open(f, \"w+\", encoding=\"utf-8\") as fobj:\n            fobj.write(str(d))\n\n        ret = main(\n            [\n                \"stage\",\n                \"add\",\n                \"-n\",\n                f\"copy-{idir}-{odir}\",\n                \"-d\",\n                idir,\n                \"-o\",\n                odir,\n                f'python -c \\'import shutil; shutil.copytree(\"{idir}\", \"{odir}\")\\'',\n            ]\n        )\n        assert ret == 0\n\n    ret = main(\n        [\n            \"stage\",\n            \"add\",\n            \"-n\",\n            \"ls\",\n            *deps,\n            \"ls {}\".format(\" \".join(dep for i, dep in enumerate(deps) if i % 2)),\n        ]\n    )\n    assert ret == 0\n\n    ret = main([\"repro\", \"--dry\", \"ls\"])\n    assert ret == 0\n\n\ndef test_repro_changed_deep_data(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    file2_stage = dvc.run(\n        outs=[\"file2\"],\n        deps=[\"file1\", \"copy.py\"],\n        cmd=\"python copy.py file1 file2\",\n        name=\"copy-file-file2\",\n    )\n    shutil.copyfile(\"bar\", \"foo\")\n    stages = dvc.reproduce(file2_stage.addressing)\n    assert filecmp.cmp(\"file1\", \"bar\", shallow=False)\n    assert filecmp.cmp(\"file2\", \"bar\", shallow=False)\n    assert len(stages) == 3\n\n\ndef test_repro_force_downstream(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stages = dvc.add(\"foo\")\n    assert len(stages) == 1\n    foo_stage = stages[0]\n    assert foo_stage is not None\n\n    shutil.copyfile(\"copy.py\", \"copy1.py\")\n    file1 = \"file1\"\n    file1_stage = dvc.run(\n        outs=[file1],\n        deps=[\"foo\", \"copy1.py\"],\n        cmd=f\"python copy1.py foo {file1}\",\n        name=\"copy-foo-file1\",\n    )\n    assert file1_stage is not None\n\n    shutil.copyfile(\"copy.py\", \"copy2.py\")\n    file2 = \"file2\"\n    file2_stage = dvc.run(\n        outs=[file2],\n        deps=[file1, \"copy2.py\"],\n        cmd=f\"python copy2.py {file1} {file2}\",\n        name=\"copy-file1-file2\",\n    )\n    assert file2_stage is not None\n\n    shutil.copyfile(\"copy.py\", \"copy3.py\")\n    file3 = \"file3\"\n    file3_stage = dvc.run(\n        outs=[file3],\n        deps=[file2, \"copy3.py\"],\n        cmd=f\"python copy3.py {file2} {file3}\",\n        name=\"copy-file2-file3\",\n    )\n    assert file3_stage is not None\n\n    with open(\"copy2.py\", \"a\", encoding=\"utf-8\") as fobj:\n        fobj.write(\"\\n\\n\")\n\n    stages = dvc.reproduce(file3_stage.addressing, force_downstream=True)\n    assert len(stages) == 2\n    assert stages[0].addressing == file2_stage.addressing\n    assert stages[1].addressing == file3_stage.addressing\n\n\ndef test_repro_force_downstream_do_not_force_independent_stages(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    foo1 = run_copy(\"foo\", \"foo1\", name=\"foo1\")\n    foo2 = run_copy(\"foo1\", \"foo2\", name=\"foo2\")\n    run_copy(\"bar\", \"bar1\", name=\"bar1\")\n    run_copy(\"bar1\", \"bar2\", name=\"bar2\")\n    cat = dvc.run(cmd=\"cat bar2 foo2\", deps=[\"foo2\", \"bar2\"], name=\"cat\")\n\n    tmp_dir.gen(\"foo\", \"foobar\")\n    assert dvc.reproduce(force_downstream=True) == [foo1, foo2, cat]\n\n\ndef test_repro_pipeline(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    stage = dvc.run(\n        outs=[\"file2\"],\n        deps=[\"file1\", \"copy.py\"],\n        cmd=\"python copy.py file1 file2\",\n        name=\"copy-file-file2\",\n    )\n    stages = dvc.reproduce(stage.addressing, force=True, pipeline=True)\n    assert len(stages) == 3\n\n\ndef test_repro_pipeline_cli(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    ret = main([\"repro\", \"--pipeline\", \"-f\", stage.addressing])\n    assert ret == 0\n\n\ndef test_repro_pipelines(tmp_dir, dvc, copy_script):\n    foo_stage, bar_stage = tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    file1_stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-FOO-file1\",\n    )\n    file2_stage = dvc.run(\n        outs=[\"file2\"],\n        deps=[\"bar\", \"copy.py\"],\n        cmd=\"python copy.py bar file2\",\n        name=\"copy-BAR-file2\",\n    )\n    assert set(dvc.reproduce(all_pipelines=True, force=True)) == {\n        foo_stage,\n        bar_stage,\n        file1_stage,\n        file2_stage,\n    }\n\n\ndef test_repro_pipelines_cli(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-FOO-file1\",\n    )\n    dvc.run(\n        outs=[\"file2\"],\n        deps=[\"bar\", \"copy.py\"],\n        cmd=\"python copy.py bar file2\",\n        name=\"copy-BAR-file2\",\n    )\n    assert main([\"repro\", \"-f\", \"-P\"]) == 0\n\n\n@pytest.mark.parametrize(\n    \"target\",\n    [\n        \"Dvcfile\",\n        \"pipelines.yaml\",\n        \"pipelines.yaml:name\",\n        \"Dvcfile:name\",\n        \"stage.dvc\",\n        \"stage.dvc:name\",\n        \"not-existing-stage.json\",\n    ],\n)\ndef test_freeze_non_existing(dvc, target):\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.freeze(target)\n\n    ret = main([\"freeze\", target])\n    assert ret != 0\n\n\ndef test_repro_frozen_callback(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    # NOTE: purposefully not specifying deps or outs\n    # to create a callback stage.\n    stage = dvc.run(cmd=\"python copy.py foo file1\", name=\"copy-FOO-file1\")\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n    dvc.freeze(stage.addressing)\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 0\n\n    dvc.unfreeze(stage.addressing)\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n\ndef test_repro_frozen_unchanged(tmp_dir, dvc, copy_script):\n    \"\"\"\n    Check that freezing/unfreezing doesn't affect stage state\n    \"\"\"\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    target = stage.addressing\n    dvc.freeze(target)\n    stages = dvc.reproduce(target)\n    assert len(stages) == 0\n\n    dvc.unfreeze(target)\n    stages = dvc.reproduce(target)\n    assert len(stages) == 0\n\n\ndef test_repro_metrics_add_unchanged(tmp_dir, dvc, copy_script):\n    \"\"\"\n    Check that adding/removing metrics doesn't affect stage state\n    \"\"\"\n    tmp_dir.gen(\"foo\", \"foo\")\n    stages = dvc.add(\"foo\")\n    assert len(stages) == 1\n    assert stages[0] is not None\n\n    dvc.run(\n        outs_no_cache=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy\",\n    )\n\n    stages = dvc.reproduce(\"copy\")\n    assert len(stages) == 0\n\n    dvc.stage.add(\n        metrics_no_cache=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy\",\n        force=True,\n    )\n\n    stages = dvc.reproduce(\"copy\")\n    assert len(stages) == 0\n\n    dvc.stage.add(\n        outs_no_cache=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy\",\n        force=True,\n    )\n\n    stages = dvc.reproduce(\"copy\")\n    assert len(stages) == 0\n\n\ndef test_repro_phony(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    stage = dvc.run(cmd=\"cat file1\", deps=[\"file1\"], name=\"cat\")\n    shutil.copyfile(\"bar\", \"foo\")\n\n    dvc.reproduce(stage.addressing)\n\n    assert filecmp.cmp(\"file1\", \"bar\", shallow=False)\n\n\ndef test_non_existing_output(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    os.unlink(\"foo\")\n\n    with pytest.raises(ReproductionError):\n        dvc.reproduce(stage.addressing)\n\n\ndef test_repro_data_source(tmp_dir, dvc, copy_script):\n    tmp_dir.gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"run1\",\n    )\n    shutil.copyfile(\"bar\", \"foo\")\n\n    stages = dvc.reproduce(stage.addressing)\n\n    assert filecmp.cmp(\"foo\", \"bar\", shallow=False)\n    assert stages[0].outs[0].hash_info.value == file_md5(\"bar\")\n\n\ndef test_repro_changed_dir(tmp_dir, dvc, copy_script):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    shutil.copyfile(\"foo\", \"file\")\n\n    stage = dvc.run(\n        outs=[\"dir\"],\n        deps=[\"file\", \"copy.py\"],\n        cmd=\"mkdir dir && python copy.py foo dir/foo\",\n        name=\"copy-in-dir\",\n    )\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 0\n\n    os.unlink(\"file\")\n    shutil.copyfile(\"bar\", \"file\")\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n\ndef test_repro_changed_dir_data(tmp_dir, dvc, copy_script):\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}, \"bar\": \"bar\"})\n    stage = dvc.run(\n        outs=[\"dir\"],\n        deps=[\"data\", \"copy.py\"],\n        cmd=\"python copy.py data dir\",\n        name=\"copy-dir\",\n    )\n\n    assert not dvc.reproduce(stage.addressing)\n\n    with (tmp_dir / \"data\" / \"foo\").open(\"a\", encoding=\"utf-8\") as f:\n        f.write(\"add\")\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n    # Check that dvc indeed registers changed output dir\n    shutil.move(\"bar\", \"dir\")\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n    file = os.path.join(\"data\", \"foo\")\n    # Check that dvc registers mtime change for the directory.\n    system.hardlink(file, file + \".lnk\")\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n\ndef test_repro_missing_lock_info(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = dvc.stage.add(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-foo-file1\",\n    )\n\n    stages = dvc.reproduce(stage.addressing)\n    assert len(stages) == 1\n\n\ndef test_repro_rm_recursive(tmp_dir, dvc):\n    # check that dir output recursively removes files in the dir\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\"}})\n    dvc.stage.add(name=\"dir\", cmd=\"mkdir dir\", outs=[\"dir\"])\n    dvc.reproduce()\n    assert (tmp_dir / \"dir\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo\").exists()\n\n\ndef test_repro_single_item_with_multiple_targets(tmp_dir, dvc, copy_script):\n    stage1 = dvc.stage.add(cmd=\"echo foo > foo\", outs=[\"foo\"], name=\"gen-foo\")\n    with dvc.lock:\n        stage1.run()\n\n    stage2 = dvc.stage.add(\n        cmd=\"python copy.py foo bar\", deps=[\"foo\"], outs=[\"bar\"], name=\"copy-foo-bar\"\n    )\n    assert dvc.reproduce([\"copy-foo-bar\", \"gen-foo\"], single_item=True) == [\n        stage2,\n        stage1,\n    ]\n\n\ndef test_repro_keep_going(mocker, tmp_dir, dvc, copy_script):\n    from dvc.repo import reproduce\n\n    (bar_stage, foo_stage) = tmp_dir.dvc_gen({\"bar\": \"bar\", \"foo\": \"foo\"})\n    stage1 = dvc.stage.add(\n        cmd=[\"python copy.py bar foobar\", \"exit 1\"],\n        deps=[\"bar\"],\n        outs=[\"foobar\"],\n        name=\"copy-bar-foobar\",\n    )\n    dvc.stage.add(cmd=\"cat foobar foo\", deps=[\"foobar\", \"foo\"], name=\"cat\")\n    spy = mocker.spy(reproduce, \"_reproduce_stage\")\n\n    with pytest.raises(ReproductionError):\n        dvc.reproduce(on_error=\"keep-going\", repro_fn=spy)\n\n    bar_call = mocker.call(bar_stage, upstream=[], force=False, interactive=False)\n    stage1_call = mocker.call(\n        stage1, upstream=[bar_stage], force=False, interactive=False\n    )\n    foo_call = mocker.call(foo_stage, upstream=[], force=False, interactive=False)\n    assert len(spy.call_args_list) == 3\n    assert foo_call in spy.call_args_list\n    assert bar_call in spy.call_args_list\n    assert stage1_call in spy.call_args_list\n\n\ndef test_repro_ignore_errors(mocker, tmp_dir, dvc, copy_script):\n    from dvc.repo import reproduce\n\n    (bar_stage, foo_stage) = tmp_dir.dvc_gen({\"bar\": \"bar\", \"foo\": \"foo\"})\n    stage1 = dvc.stage.add(\n        cmd=[\"python copy.py bar foobar\", \"exit 1\"],\n        deps=[\"bar\"],\n        outs=[\"foobar\"],\n        name=\"copy-bar-foobar\",\n    )\n    stage2 = dvc.stage.add(cmd=\"cat foobar foo\", deps=[\"foobar\", \"foo\"], name=\"cat\")\n    spy = mocker.spy(reproduce, \"_reproduce_stage\")\n    dvc.reproduce(on_error=\"ignore\", repro_fn=spy)\n\n    bar_call = mocker.call(bar_stage, upstream=[], force=False, interactive=False)\n    foo_call = mocker.call(foo_stage, upstream=[], force=False, interactive=False)\n    stage1_call = mocker.call(\n        stage1, upstream=[bar_stage], force=False, interactive=False\n    )\n    stage2_call = mocker.call(\n        stage2,\n        upstream=[foo_stage, stage1],\n        force=False,\n        interactive=False,\n    )\n    assert len(spy.call_args_list) == 4\n    assert foo_call in spy.call_args_list\n    assert bar_call in spy.call_args_list\n    assert stage1_call in spy.call_args_list\n    assert stage2_call in spy.call_args_list\n\n\n@pytest.mark.parametrize(\"persist\", [True, False])\ndef test_repro_external_outputs(tmp_dir, dvc, local_workspace, persist):\n    local_workspace.gen(\"foo\", \"foo\")\n    foo_path = str(local_workspace / \"foo\")\n    bar_path = str(local_workspace / \"bar\")\n    outs = {\"outs_no_cache\": [bar_path]}\n    if persist:\n        outs = {\"outs_persist_no_cache\": [bar_path]}\n    dvc.run(\n        name=\"mystage\",\n        cmd=f\"cp {foo_path} {bar_path}\",\n        deps=[foo_path],\n        no_exec=True,\n        **outs,\n    )\n\n    dvc.reproduce()\n    dvc.reproduce(force=True)\n\n    assert (local_workspace / \"foo\").read_text() == \"foo\"\n    assert (local_workspace / \"bar\").read_text() == \"foo\"\n    assert not (local_workspace / \"cache\").exists()\n"
  },
  {
    "path": "tests/func/repro/test_repro_allow_missing.py",
    "content": "from dvc.utils.fs import remove\n\n\ndef test_repro_allow_missing(tmp_dir, dvc):\n    tmp_dir.gen(\"fixed\", \"fixed\")\n    dvc.stage.add(name=\"create-foo\", cmd=\"echo foo > foo\", deps=[\"fixed\"], outs=[\"foo\"])\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    (create_foo, _) = dvc.reproduce()\n\n    remove(\"foo\")\n    remove(create_foo.outs[0].cache_path)\n    remove(dvc.stage_cache.cache_dir)\n\n    ret = dvc.reproduce(allow_missing=True)\n    # both stages are skipped\n    assert not ret\n\n\ndef test_repro_allow_missing_and_pull(tmp_dir, dvc, mocker, local_remote):\n    tmp_dir.gen(\"fixed\", \"fixed\")\n    dvc.stage.add(name=\"create-foo\", cmd=\"echo foo > foo\", deps=[\"fixed\"], outs=[\"foo\"])\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    (create_foo,) = dvc.reproduce(\"create-foo\")\n\n    dvc.push()\n\n    remove(\"foo\")\n    remove(create_foo.outs[0].cache_path)\n    remove(dvc.stage_cache.cache_dir)\n\n    ret = dvc.reproduce(pull=True, allow_missing=True)\n    # create-foo is skipped ; copy-foo pulls missing dep\n    assert len(ret) == 1\n\n\ndef test_repro_allow_missing_upstream_stage_modified(\n    tmp_dir, dvc, mocker, local_remote\n):\n    \"\"\"https://github.com/treeverse/dvc/issues/9530\"\"\"\n    tmp_dir.gen(\"params.yaml\", \"param: 1\")\n    dvc.stage.add(\n        name=\"create-foo\", cmd=\"echo ${param} > foo\", params=[\"param\"], outs=[\"foo\"]\n    )\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    dvc.reproduce()\n\n    dvc.push()\n\n    tmp_dir.gen(\"params.yaml\", \"param: 2\")\n    (create_foo,) = dvc.reproduce(\"create-foo\")\n    dvc.push()\n    remove(\"foo\")\n    remove(create_foo.outs[0].cache_path)\n\n    ret = dvc.reproduce(pull=True, allow_missing=True)\n    # create-foo is skipped ; copy-foo pulls modified dep\n    assert len(ret) == 1\n\n\ndef test_repro_allow_missing_cached(tmp_dir, dvc):\n    tmp_dir.gen(\"fixed\", \"fixed\")\n    dvc.stage.add(name=\"create-foo\", cmd=\"echo foo > foo\", deps=[\"fixed\"], outs=[\"foo\"])\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    dvc.reproduce()\n\n    remove(\"foo\")\n\n    ret = dvc.reproduce(allow_missing=True)\n    # both stages are skipped\n    assert not ret\n"
  },
  {
    "path": "tests/func/repro/test_repro_pull.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.stage.cache import RunCacheNotSupported\nfrom dvc.utils.fs import remove\n\n\ndef test_repro_pulls_missing_data_source(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_pulls_missing_import(tmp_dir, dvc, mocker, erepo_dir, local_remote):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"first\")\n\n    foo_import = dvc.imp(os.fspath(erepo_dir), \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo_import.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_pulls_continue_without_run_cache(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n    mocker.patch.object(\n        dvc.stage_cache, \"pull\", side_effect=RunCacheNotSupported(\"foo\")\n    )\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True)\n\n\ndef test_repro_skip_pull_if_no_run_cache_is_passed(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n\n    assert dvc.reproduce(pull=True)\n\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n    remove(\"dvc.lock\")\n    spy_pull = mocker.spy(dvc.stage_cache, \"pull\")\n    assert dvc.reproduce(pull=True, run_cache=False)\n    assert not spy_pull.called\n\n\ndef test_repro_skip_pull_if_single_item_is_passed(tmp_dir, dvc, mocker, local_remote):\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    remove(\"foo\")\n    remove(foo.outs[0].cache_path)\n\n    assert dvc.reproduce(pull=True, single_item=True)\n\n\ndef test_repro_pulls_persisted_output(tmp_dir, dvc, mocker, local_remote):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs_persist=[\"bar\"])\n    dvc.reproduce()\n    remove(\"bar\")\n\n    # stage is skipped\n    assert not dvc.reproduce(pull=True)\n\n\n@pytest.mark.parametrize(\"allow_missing\", [True, False])\ndef test_repro_pulls_allow_missing(tmp_dir, dvc, mocker, local_remote, allow_missing):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.push()\n\n    dvc.stage.add(name=\"copy-foo\", cmd=\"cp foo bar\", deps=[\"foo\"], outs=[\"bar\"])\n    dvc.reproduce()\n    remove(\"foo\")\n\n    # stage is skipped\n    assert not dvc.reproduce(pull=True, allow_missing=allow_missing)\n    # data only pulled if allow_missing is false\n    assert (tmp_dir / \"foo\").exists() != allow_missing\n\n\ndef test_repro_pull_fails(tmp_dir, dvc, mocker, local_remote):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc.stage.add(\n        name=\"concat-foo\", cmd=\"cat foo foo > bar\", deps=[\"foo\"], outs=[\"bar\"]\n    )\n    stages = dvc.reproduce()\n    remove(\"bar\")\n    remove(stages[0].outs[0].cache_path)\n\n    # stage is run\n    assert dvc.reproduce(pull=True)\n"
  },
  {
    "path": "tests/func/test_add.py",
    "content": "import errno\nimport filecmp\nimport os\nimport shutil\nimport stat\nimport textwrap\n\nimport pytest\n\nimport dvc_data\nfrom dvc.cachemgr import CacheManager\nfrom dvc.cli import main\nfrom dvc.config import ConfigError\nfrom dvc.dvcfile import DVC_FILE_SUFFIX\nfrom dvc.exceptions import (\n    DvcException,\n    OutputDuplicationError,\n    OverlappingOutputPathsError,\n)\nfrom dvc.fs import LocalFileSystem, system\nfrom dvc.output import (\n    OutputAlreadyTrackedError,\n    OutputDoesNotExistError,\n    OutputIsStageFileError,\n)\nfrom dvc.stage import Stage\nfrom dvc.stage.exceptions import StageExternalOutputsError, StagePathNotFoundError\nfrom dvc.utils.fs import path_isin\nfrom dvc.utils.serialize import YAMLFileCorruptedError, dump_yaml\nfrom dvc_data.hashfile.hash import file_md5\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom tests.utils import get_gitignore_content\n\n\ndef test_add(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (stage,) = dvc.add(\"foo\")\n    md5 = file_md5(\"foo\", dvc.fs)\n\n    assert stage is not None\n\n    assert isinstance(stage, Stage)\n    assert os.path.isfile(stage.path)\n    assert len(stage.outs) == 1\n    assert len(stage.deps) == 0\n    assert stage.cmd is None\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", md5)\n    assert stage.md5 is None\n\n    assert (tmp_dir / \"foo.dvc\").parse() == {\n        \"outs\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"path\": \"foo\",\n                \"size\": 3,\n                \"hash\": \"md5\",\n            }\n        ]\n    }\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"can't set exec bit on Windows\")\ndef test_add_executable(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    st = os.stat(\"foo\")\n    os.chmod(\"foo\", st.st_mode | stat.S_IEXEC)\n    dvc.add(\"foo\")\n\n    assert (tmp_dir / \"foo.dvc\").parse() == {\n        \"outs\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"path\": \"foo\",\n                \"size\": 3,\n                \"isexec\": True,\n                \"hash\": \"md5\",\n            }\n        ]\n    }\n    assert os.stat(\"foo\").st_mode & stat.S_IEXEC\n\n\ndef test_add_unicode(tmp_dir, dvc):\n    with open(\"\\xe1\", \"wb\", encoding=None) as fd:\n        fd.write(b\"something\")\n\n    (stage,) = dvc.add(\"\\xe1\")\n\n    assert os.path.isfile(stage.path)\n\n\ndef test_add_unsupported_file(dvc):\n    with pytest.raises(ConfigError, match=\"Unsupported URL type\"):\n        dvc.add(\"unsupported://unsupported\")\n\n\ndef test_add_directory(tmp_dir, dvc):\n    from dvc_data.hashfile import load\n\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"file\": \"file\"}})\n\n    assert stage is not None\n    assert len(stage.deps) == 0\n    assert len(stage.outs) == 1\n\n    hash_info = stage.outs[0].hash_info\n\n    obj = load(dvc.cache.local, hash_info)\n    for key, _, _ in obj:\n        for part in key:\n            assert \"\\\\\" not in part\n\n\ndef test_add_directory_with_forward_slash(tmp_dir, dvc):\n    tmp_dir.gen(\"directory\", {\"file\": \"file\"})\n    (stage,) = dvc.add(\"directory/\")\n    assert stage.relpath == \"directory.dvc\"\n\n\ndef test_add_tracked_file(tmp_dir, scm, dvc):\n    path = \"tracked_file\"\n    tmp_dir.scm_gen(path, \"...\", commit=\"add tracked file\")\n    msg = f\"\"\" output '{path}' is already tracked by SCM \\\\(e.g. Git\\\\).\n    You can remove it from Git, then add to DVC.\n        To stop tracking from Git:\n            git rm -r --cached '{path}'\n            git commit -m \"stop tracking {path}\" \"\"\"\n\n    with pytest.raises(OutputAlreadyTrackedError, match=msg):\n        dvc.add(path)\n\n\ndef test_add_dir_with_existing_cache(tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\", \"dir\": {\"file\": \"foo\"}})\n\n    (stage,) = dvc.add(\"foo\")\n    assert stage is not None\n    (stage,) = dvc.add(\"dir\")\n    assert stage is not None\n\n\ndef test_add_modified_dir(tmp_dir, dvc):\n    tmp_dir.gen(\"data\", {\"foo\": \"foo\", \"sub\": {\"bar\": \"bar\"}})\n    (stage,) = dvc.add(\"data\")\n    assert stage is not None\n\n    (tmp_dir / \"data\" / \"foo\").unlink()\n    (stage,) = dvc.add(\"data\")\n    assert stage is not None\n\n\ndef test_add_file_in_dir(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"subdir\": {\"subdata\": \"subdata content\"}}})\n    subdir_path = os.path.join(\"dir\", \"subdir\", \"subdata\")\n\n    (stage,) = dvc.add(subdir_path)\n\n    assert stage is not None\n    assert len(stage.deps) == 0\n    assert len(stage.outs) == 1\n    assert stage.relpath == subdir_path + \".dvc\"\n\n    # Current dir should not be taken into account\n    assert stage.wdir == os.path.dirname(stage.path)\n    assert stage.outs[0].def_path == \"subdata\"\n\n\n@pytest.mark.parametrize(\n    \"target, expected_def_paths, expected_rel_paths\",\n    [\n        (\n            os.path.join(\"dir\", \"subdir\", \"subdata*\"),\n            [\"subdata\", \"subdata123\"],\n            [\n                os.path.join(\"dir\", \"subdir\", \"subdata\") + \".dvc\",\n                os.path.join(\"dir\", \"subdir\", \"subdata123\") + \".dvc\",\n            ],\n        ),\n        (\n            os.path.join(\"dir\", \"subdir\", \"?subdata\"),\n            [\"esubdata\", \"isubdata\"],\n            [\n                os.path.join(\"dir\", \"subdir\", \"esubdata\") + \".dvc\",\n                os.path.join(\"dir\", \"subdir\", \"isubdata\") + \".dvc\",\n            ],\n        ),\n        (\n            os.path.join(\"dir\", \"subdir\", \"[aiou]subdata\"),\n            [\"isubdata\"],\n            [os.path.join(\"dir\", \"subdir\", \"isubdata\") + \".dvc\"],\n        ),\n        (\n            os.path.join(\"dir\", \"**\", \"subdata*\"),\n            [\"subdata\", \"subdata123\", \"subdata4\", \"subdata5\"],\n            [\n                os.path.join(\"dir\", \"subdir\", \"subdata\") + \".dvc\",\n                os.path.join(\"dir\", \"subdir\", \"subdata123\") + \".dvc\",\n                os.path.join(\"dir\", \"anotherdir\", \"subdata4\") + \".dvc\",\n                os.path.join(\"dir\", \"subdata5\") + \".dvc\",\n            ],\n        ),\n    ],\n)\ndef test_add_filtered_files_in_dir(\n    tmp_dir, dvc, target, expected_def_paths, expected_rel_paths\n):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"subdir\": {\n                    \"subdata\": \"subdata content\",\n                    \"esubdata\": \"extra subdata content\",\n                    \"isubdata\": \"i subdata content\",\n                    \"subdata123\": \"subdata content 123\",\n                },\n                \"anotherdir\": {\n                    \"subdata4\": \"subdata 4 content\",\n                    \"esubdata\": \"extra 2 subdata content\",\n                },\n                \"subdata5\": \"subdata 5 content\",\n            }\n        }\n    )\n\n    stages = dvc.add(target, glob=True)\n\n    assert len(stages) == len(expected_def_paths)\n    for stage in stages:\n        assert stage is not None\n        assert len(stage.deps) == 0\n        assert len(stage.outs) == 1\n        assert stage.relpath in expected_rel_paths\n\n        # Current dir should not be taken into account\n        assert stage.wdir == os.path.dirname(stage.path)\n        assert stage.outs[0].def_path in expected_def_paths\n\n\ndef test_cmd_add(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"non-existing-file\"])\n    assert ret != 0\n\n\ndef test_double_add_unchanged_file(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n\ndef test_double_add_unchanged_dir(tmp_dir, dvc):\n    tmp_dir.gen(\"data\", {\"foo\": \"foo\"})\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"unsupported on Windows\")\ndef test_add_colon_in_filename(tmp_dir, dvc):\n    tmp_dir.gen(\"fo:o\", \"foo\")\n    ret = main([\"add\", \"fo:o\"])\n    assert ret == 0\n\n\ndef test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):\n    file_md5_counter = mocker.spy(dvc_data.hashfile.hash, \"file_md5\")\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    ret = main([\"config\", \"cache.type\", \"copy\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 1\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 1\n\n    os.rename(\"foo\", \"foo.back\")\n    ret = main([\"checkout\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 2\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 2\n\n\ndef test_should_update_state_entry_for_directory_after_add(mocker, dvc, tmp_dir):\n    file_md5_counter = mocker.spy(dvc_data.hashfile.hash, \"file_md5\")\n\n    tmp_dir.gen({\"data/data\": \"foo\", \"data/data_sub/sub_data\": \"foo\"})\n\n    ret = main([\"config\", \"cache.type\", \"copy\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 3\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 4\n\n    os.rename(\"data\", \"data.back\")\n    ret = main([\"checkout\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 5\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert file_md5_counter.mock.call_count == 6\n\n\ndef test_add_commit(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"add\", \"foo\", \"--no-commit\"])\n    assert ret == 0\n    assert os.path.isfile(\"foo\")\n    assert not os.path.exists(dvc.cache.local.path)\n\n    ret = main([\"commit\", \"foo.dvc\"])\n    assert ret == 0\n    assert os.path.isfile(\"foo\")\n    assert dvc.cache.local.exists(\"acbd18db4cc2f85cedef654fccc4a4d8\")\n\n\ndef test_should_collect_dir_cache_only_once(mocker, tmp_dir, dvc):\n    tmp_dir.gen({\"data/data\": \"foo\"})\n    counter = mocker.spy(dvc_data.hashfile.build, \"_build_tree\")\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n    assert counter.mock.call_count == 1\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert counter.mock.call_count == 2\n\n    ret = main([\"status\"])\n    assert ret == 0\n    assert counter.mock.call_count == 3\n\n\ndef test_should_place_stage_in_data_dir_if_repository_below_symlink(\n    mocker, tmp_dir, dvc\n):\n    def is_symlink_true_below_dvc_root(path):\n        return path == os.path.dirname(dvc.root_dir)\n\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n    mocker.patch.object(\n        system, \"is_symlink\", side_effect=is_symlink_true_below_dvc_root\n    )\n    ret = main([\"add\", os.path.join(\"data\", \"foo\")])\n    assert ret == 0\n\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"data\" / \"foo.dvc\").exists()\n\n\ndef test_should_throw_proper_exception_on_corrupted_stage_file(caplog, tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \" bar\"})\n    assert main([\"add\", \"foo\"]) == 0\n\n    with (tmp_dir / \"foo.dvc\").open(\"a+\") as f:\n        f.write(\"this will break yaml file structure\")\n\n    caplog.clear()\n    assert main([\"add\", \"bar\"]) == 1\n    expected_error = \"unable to read: 'foo.dvc', YAML file structure is corrupted\"\n    assert expected_error in caplog.text\n\n\ndef test_should_throw_proper_exception_on_existing_out(caplog, tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\"})\n    (tmp_dir / \"out\").write_text(\"old contents\")\n\n    assert main([\"add\", \"foo\", \"--out\", \"out\"]) == 1\n\n    assert (tmp_dir / \"out\").read_text() == \"old contents\"\n    expected_error_lines = [\n        \"Error: The file 'out' already exists locally.\",\n        \"To override it, re-run with '--force'.\",\n    ]\n    assert all(line in caplog.text for line in expected_error_lines)\n\n\ndef test_add_force_overwrite_out(caplog, tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\"})\n    (tmp_dir / \"out\").write_text(\"old contents\")\n\n    assert main([\"add\", \"foo\", \"--out\", \"out\", \"--force\"]) == 0\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n\n\ndef test_failed_add_cleanup(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    # Add and corrupt a stage file\n    dvc.add(\"foo\")\n    tmp_dir.gen(\"foo.dvc\", \"- broken\\nyaml\")\n\n    with pytest.raises(YAMLFileCorruptedError):\n        dvc.add(\"bar\")\n\n    assert not os.path.exists(\"bar.dvc\")\n\n    gitignore_content = get_gitignore_content()\n    assert \"/bar\" not in gitignore_content\n\n\ndef test_add_unprotected(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"config\", \"cache.type\", \"hardlink\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    assert not os.access(\"foo\", os.W_OK)\n    assert system.is_hardlink(\"foo\")\n\n    ret = main([\"unprotect\", \"foo\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    assert not os.access(\"foo\", os.W_OK)\n    assert system.is_hardlink(\"foo\")\n\n\n@pytest.fixture\ndef temporary_windows_drive(tmp_path_factory):\n    import string\n    from ctypes import windll\n\n    try:\n        import win32api\n        from win32con import DDD_REMOVE_DEFINITION\n    except ImportError:\n        pytest.skip(\"pywin32 not installed\")\n\n    drives = [\n        s[0].upper()\n        for s in win32api.GetLogicalDriveStrings().split(\"\\000\")\n        if len(s) > 0\n    ]\n\n    new_drive_name = next(\n        letter for letter in string.ascii_uppercase if letter not in drives\n    )\n    new_drive = f\"{new_drive_name}:\"\n\n    target_path = tmp_path_factory.mktemp(\"tmp_windows_drive\")\n\n    set_up_result = windll.kernel32.DefineDosDeviceW(\n        0, new_drive, os.fspath(target_path)\n    )\n    if set_up_result == 0:\n        raise RuntimeError(\"Failed to mount windows drive!\")\n\n    # NOTE: new_drive has form of `A:` and joining it with some relative\n    # path might result in non-existing path (A:path\\\\to)\n    yield os.path.join(new_drive, os.sep)\n\n    tear_down_result = windll.kernel32.DefineDosDeviceW(\n        DDD_REMOVE_DEFINITION, new_drive, os.fspath(target_path)\n    )\n    if tear_down_result == 0:\n        raise RuntimeError(\"Could not unmount windows drive!\")\n\n\n@pytest.mark.skipif(os.name != \"nt\", reason=\"Windows specific\")\ndef test_windows_should_add_when_cache_on_different_drive(\n    tmp_dir, dvc, temporary_windows_drive\n):\n    dvc.config[\"cache\"][\"dir\"] = temporary_windows_drive\n    dvc.cache = CacheManager(dvc)\n\n    (stage,) = tmp_dir.dvc_gen({\"file\": \"file\"})\n    cache_path = stage.outs[0].cache_path\n\n    assert path_isin(cache_path, temporary_windows_drive)\n    assert os.path.isfile(cache_path)\n    filecmp.cmp(\"file\", cache_path)\n\n\ndef test_readding_dir_should_not_unprotect_all(tmp_dir, dvc, mocker):\n    tmp_dir.gen(\"dir/data\", \"data\")\n\n    dvc.cache.local.cache_types = [\"symlink\"]\n\n    dvc.add(\"dir\")\n    tmp_dir.gen(\"dir/new_file\", \"new_file_content\")\n\n    unprotect_spy = mocker.spy(dvc.cache.local, \"unprotect\")\n    dvc.add(\"dir\")\n\n    assert not unprotect_spy.mock.called\n    assert system.is_symlink(os.path.join(\"dir\", \"new_file\"))\n\n\ndef test_should_not_checkout_when_adding_cached_copy(tmp_dir, dvc, mocker):\n    dvc.cache.local.cache_types = [\"copy\"]\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    shutil.copy(\"bar\", \"foo\")\n\n    copy_spy = mocker.spy(dvc.cache.local.fs, \"copy\")\n\n    dvc.add(\"foo\")\n\n    assert copy_spy.mock.call_count == 0\n\n\n@pytest.mark.parametrize(\n    \"link,new_link,link_test_func\",\n    [\n        (\"hardlink\", \"copy\", lambda path: not system.is_hardlink(path)),\n        (\"symlink\", \"copy\", lambda path: not system.is_symlink(path)),\n        (\"copy\", \"hardlink\", system.is_hardlink),\n        (\"copy\", \"symlink\", system.is_symlink),\n    ],\n)\ndef test_should_relink_on_repeated_add(link, new_link, link_test_func, tmp_dir, dvc):\n    dvc.config[\"cache\"][\"type\"] = link\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    os.remove(\"foo\")\n    getattr(dvc.cache.local.fs, link)(\n        (tmp_dir / \"bar\").fs_path, (tmp_dir / \"foo\").fs_path\n    )\n\n    dvc.cache.local.cache_types = [new_link]\n\n    dvc.add(\"foo\")\n\n    assert link_test_func(\"foo\")\n\n\n@pytest.mark.parametrize(\"link\", [\"hardlink\", \"symlink\", \"copy\"])\ndef test_should_protect_on_repeated_add(link, tmp_dir, dvc):\n    dvc.cache.local.cache_types = [link]\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n\n    dvc.unprotect(\"foo\")\n\n    dvc.add(\"foo\")\n\n    assert not os.access(\n        os.path.join(\".dvc\", \"cache\", \"ac\", \"bd18db4cc2f85cedef654fccc4a4d8\"),\n        os.W_OK,\n    )\n\n    # NOTE: Windows symlink perms don't propagate to the target\n    if link == \"copy\" or (link == \"symlink\" and os.name == \"nt\"):\n        assert os.access(\"foo\", os.W_OK)\n    else:\n        assert not os.access(\"foo\", os.W_OK)\n\n\ndef test_escape_gitignore_entries(tmp_dir, scm, dvc):\n    fname = \"file!with*weird#naming_[1].t?t\"\n    ignored_fname = r\"/file\\!with\\*weird\\#naming_\\[1\\].t\\?t\"\n\n    if os.name == \"nt\":\n        # Some characters are not supported by Windows in the filename\n        # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file\n        fname = \"file!with_weird#naming_[1].txt\"\n        ignored_fname = r\"/file\\!with_weird\\#naming_\\[1\\].txt\"\n\n    tmp_dir.dvc_gen(fname, \"...\")\n    assert ignored_fname in get_gitignore_content()\n\n\ndef test_add_from_data_dir(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"file1\": \"file1 content\"}})\n\n    tmp_dir.gen({\"dir\": {\"file2\": \"file2 content\"}})\n\n    dvc.add(os.path.join(\"dir\", \"file2\"))\n\n\ndef test_add_parent_dir(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"dir\": {\"file1\": \"file1 content\"}})\n    out_path = os.path.join(\"dir\", \"file1\")\n    dvc.add(out_path)\n\n    with pytest.raises(OverlappingOutputPathsError) as e:\n        dvc.add(\"dir\")\n    assert str(e.value) == (\n        \"Cannot add 'dir', because it is overlapping with other DVC \"\n        \"tracked output: '{out}'.\\n\"\n        \"To include '{out}' in 'dir', run 'dvc remove {out}.dvc' \"\n        \"and then 'dvc add dir'\"\n    ).format(out=os.path.join(\"dir\", \"file1\"))\n\n\ndef test_not_raises_on_re_add(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file\", \"file content\")\n\n    tmp_dir.gen({\"file2\": \"file2 content\", \"file\": \"modified file\"})\n    dvc.add([\"file2\", \"file\"])\n\n\n@pytest.mark.parametrize(\"link\", [\"hardlink\", \"symlink\", \"copy\"])\ndef test_add_empty_files(tmp_dir, dvc, link):\n    file = \"foo\"\n    dvc.cache.local.cache_types = [link]\n    stages = tmp_dir.dvc_gen(file, \"\")\n\n    assert (tmp_dir / file).exists()\n    assert (tmp_dir / (file + DVC_FILE_SUFFIX)).exists()\n    assert os.path.exists(stages[0].outs[0].cache_path)\n\n\ndef test_add_optimization_for_hardlink_on_empty_files(tmp_dir, dvc, mocker):\n    dvc.cache.local.cache_types = [\"hardlink\"]\n    tmp_dir.gen({\"foo\": \"\", \"bar\": \"\", \"lorem\": \"lorem\", \"ipsum\": \"ipsum\"})\n    m = mocker.spy(LocalFileSystem, \"is_hardlink\")\n    stages = dvc.add([\"foo\", \"bar\", \"lorem\", \"ipsum\"])\n\n    assert m.call_count == 8\n    assert m.call_args != mocker.call(tmp_dir / \"foo\")\n    assert m.call_args != mocker.call(tmp_dir / \"bar\")\n\n    for stage in stages[:2]:\n        # hardlinks are not created for empty files\n        assert not system.is_hardlink(stage.outs[0].fs_path)\n\n    for stage in stages[2:]:\n        assert system.is_hardlink(stage.outs[0].fs_path)\n\n    for stage in stages:\n        assert os.path.exists(stage.path)\n        assert os.path.exists(stage.outs[0].cache_path)\n\n\ndef test_try_adding_pipeline_tracked_output(tmp_dir, dvc, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    msg = (\n        \"cannot update 'bar': overlaps with an output of stage: 'copy-foo-bar' in \"\n        \"'dvc.yaml'.\\nRun the pipeline or use 'dvc commit' to force update it.\"\n    )\n    with pytest.raises(DvcException, match=msg):\n        dvc.add(\"bar\")\n\n\ndef test_try_adding_multiple_overlaps(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvcyaml_content = {\n        \"stages\": {\n            \"echo-foo\": {\n                \"cmd\": \"echo foo > foo\",\n                \"outs\": [\"foo\"],\n            }\n        }\n    }\n    dump_yaml(\"dvc.yaml\", dvcyaml_content)\n    msg = (\n        \"\\nUse `dvc remove` with any of the above targets to stop tracking the \"\n        \"overlapping output.\"\n    )\n    with pytest.raises(OutputDuplicationError, match=msg):\n        dvc.add(\"foo\")\n\n\ndef test_add_pipeline_file(tmp_dir, dvc, run_copy):\n    from dvc.dvcfile import PROJECT_FILE\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    with pytest.raises(OutputIsStageFileError):\n        dvc.add(PROJECT_FILE)\n\n\ndef test_add_symlink_file(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"bar\": \"bar\"}})\n\n    (tmp_dir / \"dir\" / \"foo\").symlink_to(os.path.join(\".\", \"bar\"))\n\n    dvc.add(os.path.join(\"dir\", \"foo\"))\n\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"dir\" / \"foo.dvc\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo\").is_symlink()\n    assert not (tmp_dir / \"dir\" / \"bar\").is_symlink()\n    assert (tmp_dir / \"dir\" / \"foo\").read_text() == \"bar\"\n    assert (tmp_dir / \"dir\" / \"bar\").read_text() == \"bar\"\n\n    assert (\n        tmp_dir\n        / \".dvc\"\n        / \"cache\"\n        / \"files\"\n        / \"md5\"\n        / \"37\"\n        / \"b51d194a7513e45b56f6524f2d51f2\"\n    ).read_text() == \"bar\"\n    assert not (\n        tmp_dir\n        / \".dvc\"\n        / \"cache\"\n        / \"files\"\n        / \"md5\"\n        / \"37\"\n        / \"b51d194a7513e45b56f6524f2d51f2\"\n    ).is_symlink()\n\n    # Test that subsequent add succeeds\n    # See https://github.com/treeverse/dvc/issues/4654\n    dvc.add(os.path.join(\"dir\", \"foo\"))\n\n\ndef test_add_symlink_dir(make_tmp_dir, tmp_dir, dvc):\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n    target = os.path.join(\".\", \"data\")\n\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n\n    (tmp_dir / \"dir\").symlink_to(target)\n\n    msg = \"Cannot add files inside symlinked directories to DVC\"\n    with pytest.raises(DvcException, match=msg):\n        dvc.add(\"dir\")\n\n\ndef test_add_file_in_symlink_dir(make_tmp_dir, tmp_dir, dvc):\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n    target = os.path.join(\".\", \"data\")\n\n    (tmp_dir / \"dir\").symlink_to(target)\n\n    msg = \"Cannot add files inside symlinked directories to DVC\"\n    with pytest.raises(DvcException, match=msg):\n        dvc.add(os.path.join(\"dir\", \"foo\"))\n\n\ndef test_add_with_cache_link_error(tmp_dir, dvc, mocker, capsys):\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    dvc.cache.local.cache_types = [\"symlink\", \"hardlink\"]\n    mocker.patch(\"dvc_data.hashfile.checkout.test_links\", return_value=[])\n    dvc.add(\"foo\")\n    err = capsys.readouterr()[1]\n    assert \"reconfigure cache types\" in err\n\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"foo.dvc\").exists()\n    assert (\n        tmp_dir\n        / \".dvc\"\n        / \"cache\"\n        / \"files\"\n        / \"md5\"\n        / \"ac\"\n        / \"bd18db4cc2f85cedef654fccc4a4d8\"\n    ).read_text() == \"foo\"\n\n\ndef test_add_preserve_fields(tmp_dir, dvc):\n    text = textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        outs:\n        - path: foo # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          remote: testremote\n        meta: some metadata\n    \"\"\"\n    )\n    tmp_dir.gen(\"foo.dvc\", text)\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert (tmp_dir / \"foo.dvc\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        outs:\n        - path: foo # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          remote: testremote\n          md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n        meta: some metadata\n    \"\"\"\n    )\n\n\n# NOTE: unless long paths are enabled on Windows, PATH_MAX and NAME_MAX\n# are the same 260 chars, which makes the test unnecessarily complex\n@pytest.mark.skipif(os.name == \"nt\", reason=\"unsupported on Windows\")\ndef test_add_long_fname(tmp_dir, dvc):\n    name_max = os.pathconf(tmp_dir, \"PC_NAME_MAX\")\n    name = \"a\" * name_max\n    tmp_dir.gen({\"data\": {name: \"foo\"}})\n\n    # nothing we can do in this case, as the resulting dvcfile\n    # will definitely exceed NAME_MAX\n    with pytest.raises(OSError, match=f\"File name too long: .*{name}\") as info:\n        dvc.add(os.path.join(\"data\", name))\n    assert info.value.errno == errno.ENAMETOOLONG\n\n    dvc.add(\"data\")\n    assert (tmp_dir / \"data\").read_text() == {name: \"foo\"}\n\n\ndef test_add_to_remote_absolute(tmp_dir, make_tmp_dir, dvc, remote):\n    tmp_abs_dir = make_tmp_dir(\"abs\")\n    tmp_foo = tmp_abs_dir / \"foo\"\n    tmp_foo.write_text(\"foo\")\n\n    dvc.add(str(tmp_foo), to_remote=True)\n    tmp_foo.unlink()\n\n    foo = tmp_dir / \"foo\"\n    assert foo.with_suffix(\".dvc\").exists()\n    assert not os.path.exists(tmp_foo)\n\n    dvc.pull(\"foo\")\n    assert not os.path.exists(tmp_foo)\n    assert foo.read_text() == \"foo\"\n\n    tmp_bar = tmp_abs_dir / \"bar\"\n    with pytest.raises(StageExternalOutputsError):\n        dvc.add(str(tmp_foo), out=str(tmp_bar), to_remote=True)\n\n\ndef test_add_to_cache_dir(tmp_dir, dvc, local_cloud):\n    local_cloud.gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    (stage,) = dvc.add(str(local_cloud / \"data\"), out=\"data\")\n    assert len(stage.deps) == 0\n    assert len(stage.outs) == 1\n    assert stage.outs[0].meta.size == len(\"foo\") + len(\"bar\")\n    assert stage.outs[0].meta.nfiles == 2\n\n    data = tmp_dir / \"data\"\n    assert data.read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n    assert (tmp_dir / \"data.dvc\").exists()\n\n    shutil.rmtree(data)\n    status = dvc.checkout(str(data))\n    assert status[\"added\"] == [\"data\" + os.sep]\n    assert data.read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n\n\ndef test_add_to_cache_file(tmp_dir, dvc, local_cloud):\n    local_cloud.gen(\"foo\", \"foo\")\n\n    (stage,) = dvc.add(str(local_cloud / \"foo\"), out=\"foo\")\n    assert len(stage.deps) == 0\n    assert len(stage.outs) == 1\n\n    foo = tmp_dir / \"foo\"\n    assert foo.read_text() == \"foo\"\n    assert (tmp_dir / \"foo.dvc\").exists()\n\n    foo.unlink()\n    status = dvc.checkout(str(foo))\n    assert status[\"added\"] == [\"foo\"]\n    assert foo.read_text() == \"foo\"\n\n\ndef test_add_with_out(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"foo\": \"foo\"})\n    dvc.add(\"foo\", out=\"out_foo\")\n    gitignore_content = get_gitignore_content()\n    assert \"/out_foo\" in gitignore_content\n\n\ndef test_add_to_cache_different_name(tmp_dir, dvc, local_cloud):\n    local_cloud.gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    dvc.add(str(local_cloud / \"data\"), out=\"not_data\")\n\n    not_data = tmp_dir / \"not_data\"\n    assert not_data.read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n    assert (tmp_dir / \"not_data.dvc\").exists()\n\n    assert not (tmp_dir / \"data\").exists()\n    assert not (tmp_dir / \"data.dvc\").exists()\n\n    shutil.rmtree(not_data)\n    dvc.checkout(str(not_data))\n    assert not_data.read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n    assert not (tmp_dir / \"data\").exists()\n\n\ndef test_add_to_cache_not_exists(tmp_dir, dvc, local_cloud):\n    local_cloud.gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    dest_dir = tmp_dir / \"dir\" / \"that\" / \"does\" / \"not\" / \"exist\"\n    with pytest.raises(StagePathNotFoundError):\n        dvc.add(str(local_cloud / \"data\"), out=str(dest_dir))\n\n    dest_dir.parent.mkdir(parents=True)\n    dvc.add(str(local_cloud / \"data\"), out=str(dest_dir))\n\n    assert dest_dir.read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n    assert dest_dir.with_suffix(\".dvc\").exists()\n\n\ndef test_add_to_cache_from_remote(tmp_dir, dvc, workspace):\n    workspace.gen(\"foo\", \"foo\")\n\n    url = \"remote://workspace/foo\"\n    dvc.add(url, out=\"foo\")\n\n    foo = tmp_dir / \"foo\"\n    assert foo.read_text() == \"foo\"\n    assert (tmp_dir / \"foo.dvc\").exists()\n\n    # Change the contents of the remote location, in order to\n    # ensure it retrieves file from the cache and not re-fetches it\n    (workspace / \"foo\").write_text(\"bar\")\n\n    foo.unlink()\n    dvc.checkout(str(foo))\n    assert foo.read_text() == \"foo\"\n\n\ndef test_add_ignored(tmp_dir, scm, dvc):\n    from dvc.dvcfile import FileIsGitIgnored\n\n    tmp_dir.gen({\"dir\": {\"subdir\": {\"file\": \"content\"}}, \".gitignore\": \"dir/\"})\n    with pytest.raises(FileIsGitIgnored) as exc:\n        dvc.add(targets=[os.path.join(\"dir\", \"subdir\")])\n    assert str(exc.value) == (\"bad DVC file name '{}' is git-ignored.\").format(\n        os.path.join(\"dir\", \"subdir.dvc\")\n    )\n\n\ndef test_add_on_not_existing_file_should_not_remove_stage_file(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (tmp_dir / \"foo\").unlink()\n    dvcfile_contents = (tmp_dir / stage.path).read_text()\n\n    with pytest.raises(OutputDoesNotExistError):\n        dvc.add(\"foo\")\n    assert (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / stage.path).read_text() == dvcfile_contents\n\n\n@pytest.mark.parametrize(\n    \"target\",\n    [\n        \"dvc.repo.index.Index.check_graph\",\n        \"dvc.stage.Stage.add_outs\",\n    ],\n)\ndef test_add_does_not_remove_stage_file_on_failure(tmp_dir, dvc, mocker, target):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.gen(\"foo\", \"foobar\")  # update file\n    dvcfile_contents = (tmp_dir / stage.path).read_text()\n\n    exc_msg = f\"raising error from mocked '{target}'\"\n    mocker.patch(target, side_effect=DvcException(exc_msg))\n\n    with pytest.raises(DvcException, match=exc_msg):\n        dvc.add(\"foo\")\n    assert (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / stage.path).read_text() == dvcfile_contents\n\n\ndef test_add_updates_to_cloud_versioning_dir(tmp_dir, dvc):\n    data_dvc = tmp_dir / \"data.dvc\"\n    data_dvc.dump(\n        {\n            \"outs\": [\n                {\n                    \"path\": \"data\",\n                    \"hash\": \"md5\",\n                    \"files\": [\n                        {\n                            \"size\": 3,\n                            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n                            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"relpath\": \"bar\",\n                        },\n                        {\n                            \"size\": 3,\n                            \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n                            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"relpath\": \"foo\",\n                        },\n                    ],\n                }\n            ]\n        }\n    )\n\n    data = tmp_dir / \"data\"\n    data.mkdir()\n    (data / \"foo\").write_text(\"foo\")\n    (data / \"bar\").write_text(\"bar2\")\n\n    dvc.add(\"data\")\n\n    assert (tmp_dir / \"data.dvc\").parse() == {\n        \"outs\": [\n            {\n                \"path\": \"data\",\n                \"hash\": \"md5\",\n                \"files\": [\n                    {\n                        \"size\": 4,\n                        \"md5\": \"224e2539f52203eb33728acd228b4432\",\n                        \"relpath\": \"bar\",\n                    },\n                    {\n                        \"size\": 3,\n                        \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n                        \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                        \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                        \"relpath\": \"foo\",\n                    },\n                ],\n            }\n        ]\n    }\n"
  },
  {
    "path": "tests/func/test_analytics.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc import __version__, env\nfrom dvc.analytics import _scm_in_use, collect_and_send_report\nfrom dvc.cli import main\nfrom dvc.repo import Repo\nfrom tests.utils import ANY\n\n\ndef test_daemon_analytics(mocker, tmp_path):\n    mock_send = mocker.patch(\"dvc.analytics.send\")\n    report = os.fspath(tmp_path)\n    assert main([\"daemon\", \"analytics\", report]) == 0\n\n    mock_send.assert_called_with(report)\n\n\ndef test_main_analytics(mocker, tmp_dir, dvc):\n    mock_is_enabled = mocker.patch(\"dvc.analytics.collect_and_send_report\")\n    mock_report = mocker.patch(\"dvc.analytics.is_enabled\", return_value=True)\n    tmp_dir.gen(\"foo\", \"text\")\n    assert main([\"add\", \"foo\"]) == 0\n    assert mock_is_enabled.called\n    assert mock_report.called\n\n\n@pytest.fixture\ndef mock_daemon(mocker):\n    def func(argv):\n        return main([\"daemon\", *argv])\n\n    return mocker.patch(\"dvc.daemon.daemon\", mocker.MagicMock(side_effect=func))\n\n\ndef test_collect_and_send_report(monkeypatch, mocker, dvc, mock_daemon):\n    monkeypatch.delenv(env.DVC_ANALYTICS_ENDPOINT, raising=False)\n    mock_post = mocker.patch(\"requests.post\")\n    collect_and_send_report()\n\n    assert mock_daemon.call_count == 1\n    assert mock_post.call_count == 1\n    assert mock_post.call_args == mocker.call(\n        \"https://analytics.dvc.org\",\n        json={\n            \"dvc_version\": __version__,\n            \"scm_class\": type(dvc.scm).__name__,\n            \"is_binary\": False,\n            \"system_info\": ANY(dict),\n            \"user_id\": ANY(str),\n            \"group_id\": mocker.ANY,\n            \"remotes\": ANY(list),\n            \"git_remote_hash\": None,\n        },\n        headers={\"content-type\": \"application/json\"},\n        timeout=5,\n    )\n\n\ndef test_scm_dvc_only(tmp_dir, dvc):\n    scm = _scm_in_use(dvc.scm)\n    assert scm == \"NoSCM\"\n\n\ndef test_scm_git(tmp_dir, scm, dvc):\n    scm = _scm_in_use(scm)\n    assert scm == \"Git\"\n\n\ndef test_scm_subrepo(tmp_dir, scm):\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n\n    with subdir.chdir():\n        repo = Repo.init(subdir=True)\n        scm = _scm_in_use(repo.scm)\n\n    assert scm == \"Git\"\n"
  },
  {
    "path": "tests/func/test_check_ignore.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.ignore import DvcIgnore\n\n\n@pytest.mark.parametrize(\n    \"file,ret,output\", [(\"ignored\", 0, True), (\"not_ignored\", 1, False)]\n)\ndef test_check_ignore(tmp_dir, dvc, file, ret, output, caplog, capsys):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"ignored\")\n\n    assert main([\"check-ignore\", file]) == ret\n\n    out, _ = capsys.readouterr()\n    assert (file in out) is output\n    assert \"Having any troubles?\" not in caplog.text\n\n\n@pytest.mark.parametrize(\n    \"file,ret,output\",\n    [\n        (\"file\", 0, f\"{DvcIgnore.DVCIGNORE_FILE}:1:f*\\tfile\\n\"),\n        (\"foo\", 0, f\"{DvcIgnore.DVCIGNORE_FILE}:2:!foo\\tfoo\\n\"),\n        (\n            os.path.join(\"dir\", \"foobar\"),\n            0,\n            \"{}:1:foobar\\t{}\\n\".format(\n                os.path.join(\"dir\", DvcIgnore.DVCIGNORE_FILE),\n                os.path.join(\"dir\", \"foobar\"),\n            ),\n        ),\n    ],\n)\ndef test_check_ignore_details(tmp_dir, dvc, file, ret, output, capsys):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"f*\\n!foo\")\n    tmp_dir.gen({\"dir\": {DvcIgnore.DVCIGNORE_FILE: \"foobar\"}})\n\n    assert main([\"check-ignore\", \"-d\", file]) == ret\n    assert (output, \"\") == capsys.readouterr()\n\n\n@pytest.mark.parametrize(\"non_matching\", [True, False])\ndef test_check_ignore_non_matching(tmp_dir, dvc, non_matching, caplog, capsys):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"other\")\n    if non_matching:\n        assert main([\"check-ignore\", \"-d\", \"-n\", \"file\"]) == 1\n    else:\n        assert main([\"check-ignore\", \"-d\", \"file\"]) == 1\n\n    out, _ = capsys.readouterr()\n    assert (\"::\\tfile\\n\" in out) is non_matching\n\n\n@pytest.mark.parametrize(\n    \"args\",\n    [\n        [\"-n\", \"file\"],\n        [\"-a\", \"file\"],\n        [\"-q\", \"-d\", \"file\"],\n        [\"--stdin\", \"file\"],\n        [],\n    ],\n)\ndef test_check_ignore_error_args_cases(tmp_dir, dvc, args, caplog):\n    assert main([\"check-ignore\", *args]) == 255\n    assert \"Having any troubles?\" not in caplog.text\n\n\n@pytest.mark.parametrize(\"path,ret\", [({\"dir\": {}}, 0), ({\"dir\": \"files\"}, 1)])\ndef test_check_ignore_dir(tmp_dir, dvc, path, ret):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"dir/\")\n    tmp_dir.gen(path)\n\n    assert main([\"check-ignore\", \"-q\", \"dir\"]) == ret\n\n\ndef test_check_ignore_default_dir(tmp_dir, dvc):\n    assert main([\"check-ignore\", \"-q\", \".dvc\"]) == 0\n\n\ndef test_check_ignore_out_side_repo(tmp_dir, dvc):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"file\")\n    assert main([\"check-ignore\", \"-q\", \"../file\"]) == 1\n\n\ndef test_check_ignore_sub_repo(tmp_dir, dvc, capsys):\n    tmp_dir.gen({DvcIgnore.DVCIGNORE_FILE: \"other\", \"dir\": {\".dvc\": {}, \"foo\": \"bar\"}})\n\n    assert main([\"check-ignore\", \"-d\", os.path.join(\"dir\", \"foo\")]) == 0\n    out, _ = capsys.readouterr()\n    assert \"in sub_repo:{}\\t{}\".format(\"dir\", os.path.join(\"dir\", \"foo\")) in out\n\n\ndef test_check_sub_dir_ignore_file(tmp_dir, dvc, capsys):\n    tmp_dir.gen(\n        {\n            DvcIgnore.DVCIGNORE_FILE: \"other\",\n            \"dir\": {DvcIgnore.DVCIGNORE_FILE: \"bar\\nfoo\", \"foo\": \"bar\"},\n        }\n    )\n\n    assert main([\"check-ignore\", \"-d\", os.path.join(\"dir\", \"foo\")]) == 0\n\n    out, _ = capsys.readouterr()\n    assert (\n        \"{}:2:foo\\t{}\".format(\n            os.path.join(\"dir\", DvcIgnore.DVCIGNORE_FILE),\n            os.path.join(\"dir\", \"foo\"),\n        )\n        in out\n    )\n\n    sub_dir = tmp_dir / \"dir\"\n    with sub_dir.chdir():\n        assert main([\"check-ignore\", \"-d\", \"foo\"]) == 0\n        out, _ = capsys.readouterr()\n        assert \".dvcignore:2:foo\\tfoo\" in out\n\n\ndef test_check_ignore_details_all(tmp_dir, dvc, capsys):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"f*\\n!foo\")\n\n    assert main([\"check-ignore\", \"-d\", \"-a\", \"foo\"]) == 0\n    out, _ = capsys.readouterr()\n    assert f\"{DvcIgnore.DVCIGNORE_FILE}:2:!foo\\tfoo\\n\" in out\n\n\n@pytest.mark.parametrize(\n    \"file,ret,output\", [(\"ignored\", 0, True), (\"not_ignored\", 1, False)]\n)\ndef test_check_ignore_stdin_mode(tmp_dir, dvc, file, ret, output, capsys, mocker):\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"ignored\")\n    mocker.patch(\"builtins.input\", side_effect=[file, \"\"])\n\n    assert main([\"check-ignore\", \"--stdin\"]) == ret\n    out, _ = capsys.readouterr()\n    assert (file in out) is output\n"
  },
  {
    "path": "tests/func/test_checkout.py",
    "content": "import logging\nimport os\nimport shutil\nimport stat\nimport textwrap\n\nimport pytest\nfrom dulwich.porcelain import remove as git_rm\n\nfrom dvc.cli import main\nfrom dvc.dvcfile import PROJECT_FILE, FileMixin, SingleStageFile, load_file\nfrom dvc.exceptions import CheckoutError, CheckoutErrorSuggestGit, NoOutputOrStageError\nfrom dvc.fs import system\nfrom dvc.stage.exceptions import StageFileDoesNotExistError\nfrom dvc.utils import relpath\nfrom dvc.utils.fs import remove\nfrom tests.utils import get_gitignore_content\n\nlogger = logging.getLogger(\"dvc\")\n\n\ndef walk_files(directory):\n    for root, _, files in os.walk(directory):\n        for f in files:\n            yield os.path.join(root, f)\n\n\ndef test_checkout(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"data\": {\"file\": \"file\"}})\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-foo-file1\",\n    )\n    remove(tmp_dir / \"foo\")\n    remove(\"data\")\n\n    assert dvc.checkout(force=True) == empty_checkout | {\n        \"added\": [\"data\" + os.sep, \"foo\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"data\").read_text() == {\"file\": \"file\"}\n\n\ndef test_checkout_cli(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"data\": {\"file\": \"file\"}})\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-foo-file1\",\n    )\n    remove(tmp_dir / \"foo\")\n    remove(\"data\")\n\n    assert main([\"checkout\", \"--force\"]) == 0\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"data\").read_text() == {\"file\": \"file\"}\n\n    remove(tmp_dir / \"foo\")\n    remove(\"data\")\n\n    assert main([\"checkout\", \"--force\", \"foo.dvc\"]) == 0\n    assert main([\"checkout\", \"--force\", \"data.dvc\"]) == 0\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"data\").read_text() == {\"file\": \"file\"}\n\n\n@pytest.mark.parametrize(\n    \"summary,expected_lines\",\n    [\n        (\n            True,\n            [\n                \"1 file modified, 3 files added and 1 file deleted\",\n            ],\n        ),\n        (\n            False,\n            [\n                \"M\\tbar\".expandtabs(),\n                \"A\\tdir\".expandtabs() + os.sep,\n                \"A\\tfoo\".expandtabs(),\n                \"A\\tlorem\".expandtabs(),\n                \"D\\tipsum\".expandtabs(),\n            ],\n        ),\n    ],\n)\ndef test_checkout_stats(tmp_dir, dvc, capsys, summary, expected_lines):\n    tmp_dir.dvc_gen(\n        {\n            \"foo\": \"foo\",\n            \"bar\": \"bar\",\n            \"lorem\": \"lorem\",\n            \"dir\": {\"file\": \"file\"},\n            \"ipsum\": \"ipsum\",\n            \"dolor\": \"dolor\",\n        }\n    )\n    for out in [\"foo\", \"bar\", \"lorem\", \"dir\"]:\n        remove(tmp_dir / out)\n    (tmp_dir / \"ipsum.dvc\").unlink()\n    (tmp_dir / \"bar\").write_text(\"foobar\")\n\n    opts = [\"--summary\"] if summary else []\n    assert main([\"checkout\", \"--force\", *opts]) == 0\n    out, _ = capsys.readouterr()\n    assert out.splitlines() == expected_lines\n\n    main([\"checkout\"])\n    out, _ = capsys.readouterr()\n    assert not out\n\n\ndef test_remove_files_when_checkout(tmp_dir, dvc, scm):\n    # add the file into a separate branch\n    scm.checkout(\"branch\", True)\n    ret = main([\"checkout\", \"--force\"])\n    assert ret == 0\n    tmp_dir.dvc_gen(\"file_in_a_branch\", \"random text\", commit=\"add file\")\n\n    # Checkout back in master\n    scm.checkout(\"master\")\n    assert os.path.exists(\"file_in_a_branch\")\n\n    # Make sure `dvc checkout` removes the file\n    # self.dvc.checkout()\n    ret = main([\"checkout\", \"--force\"])\n    assert ret == 0\n    assert not os.path.exists(\"file_in_a_branch\")\n\n\nclass TestCheckoutCleanWorkingDir:\n    def test(self, mocker, tmp_dir, dvc):\n        (stage,) = tmp_dir.dvc_gen(\"data\", {\"foo\": \"foo\"})\n\n        # change working directory\n        (tmp_dir / \"data\").gen(\"not_cached.txt\", \"not_cached\")\n        assert main([\"checkout\", stage.relpath, \"--force\"]) == 0\n        assert not (tmp_dir / \"data\" / \"not_cached.txt\").exists()\n\n    def test_force(self, mocker, tmp_dir, dvc):\n        (stage,) = tmp_dir.dvc_gen(\"data\", {\"foo\": \"foo\"})\n\n        # change working directory\n        (tmp_dir / \"data\").gen(\"not_cached.txt\", \"not_cached\")\n        assert main([\"checkout\", stage.relpath]) != 0\n        assert (tmp_dir / \"data\" / \"not_cached.txt\").exists()\n\n\ndef test_checkout_selective_remove(tmp_dir, dvc):\n    # Use copy to test for changes in the inodes\n    dvc.cache.local.cache_types = [\"copy\"]\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    foo_inode = system.inode(os.path.join(\"data\", \"foo\"))\n    bar_inode = system.inode(os.path.join(\"data\", \"bar\"))\n    # move instead of remove, to lock inode assigned to stage_files[0].path\n    # if we were to use remove, we might end up with same inode assigned to\n    # newly checked out file\n    shutil.move(os.path.join(\"data\", \"foo\"), \"random_name\")\n\n    assert main([\"checkout\", \"--force\", \"data.dvc\"]) == 0\n    assert (tmp_dir / \"data\").read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n    assert system.inode(os.path.join(\"data\", \"foo\")) != foo_inode\n    assert system.inode(os.path.join(\"data\", \"bar\")) == bar_inode\n\n\ndef test_gitignore_basic(tmp_dir, dvc, scm):\n    tmp_dir.gen(\"foo\", \"foo\")\n    assert not os.path.exists(scm.GITIGNORE)\n\n    tmp_dir.dvc_gen(\"file1\", \"random text1\", commit=\"add file1\")\n    tmp_dir.dvc_gen(\"file2\", \"random text2\", commit=\"add file2\")\n    dvc.run(\n        cmd=\"cp foo file3\",\n        deps=[\"foo\"],\n        outs_no_cache=[\"file3\"],\n        name=\"cp-foo-file3\",\n    )\n    assert get_gitignore_content() == [\"/file1\", \"/file2\"]\n\n\ndef test_gitignore_when_checkout(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"file_in_a_master\", \"master\", commit=\"master\")\n\n    scm.checkout(\"branch\", True)\n    ret = main([\"checkout\", \"--force\"])\n    assert ret == 0\n    tmp_dir.dvc_gen(\"file_in_a_branch\", \"branch\", commit=\"branch\")\n\n    scm.checkout(\"master\")\n    ret = main([\"checkout\", \"--force\"])\n    assert ret == 0\n\n    ignored = get_gitignore_content()\n\n    assert len(ignored) == 1\n    assert \"/file_in_a_master\" in ignored\n\n    scm.checkout(\"branch\")\n    ret = main([\"checkout\", \"--force\"])\n    assert ret == 0\n    ignored = get_gitignore_content()\n    assert \"/file_in_a_branch\" in ignored\n\n\ndef test_checkout_missing_md5_in_lock_file_for_outs_deps(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"data\": {\"file\": \"file\"}})\n    dvc.stage.add(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-file\",\n    )\n\n    with pytest.raises(CheckoutError) as exc:\n        dvc.checkout(force=True)\n    assert exc.value.result == empty_checkout | {\"failed\": [\"file1\"]}\n\n\ndef test_checkout_empty_dir(tmp_dir, dvc):\n    empty_dir = tmp_dir / \"empty_dir\"\n    empty_dir.mkdir()\n    (stage,) = dvc.add(\"empty_dir\")\n\n    stage.outs[0].remove()\n    assert not empty_dir.exists()\n\n    assert dvc.checkout(force=True) == empty_checkout | {\n        \"added\": [os.path.join(\"empty_dir\", \"\")],\n        \"stats\": empty_stats | {\"added\": 0},\n    }\n    assert empty_dir.is_dir()\n    assert not list(empty_dir.iterdir())\n\n\ndef test_checkout_not_cached_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.run(cmd=\"cp foo bar\", deps=[\"foo\"], outs_no_cache=[\"bar\"], name=\"copy-file\")\n    assert dvc.checkout(force=True) == empty_checkout\n\n\ndef test_checkout_with_deps_cli(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"data\": {\"file\": \"file\"}})\n    dvc.run(\n        outs=[\"file1\"],\n        deps=[\"foo\", \"copy.py\"],\n        cmd=\"python copy.py foo file1\",\n        name=\"copy-file\",\n    )\n    remove(\"foo\")\n    remove(\"file1\")\n\n    assert not os.path.exists(\"foo\")\n    assert not os.path.exists(\"file1\")\n\n    ret = main([\"checkout\", \"--force\", \"copy-file\", \"--with-deps\"])\n    assert ret == 0\n\n    assert os.path.exists(\"foo\")\n    assert os.path.exists(\"file1\")\n\n\ndef test_checkout_directory(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    remove(\"data\")\n    assert not os.path.exists(\"data\")\n\n    ret = main([\"checkout\", stage.path])\n    assert ret == 0\n\n    assert os.path.exists(\"data\")\n\n\ndef test_checkout_suggest_git(tmp_dir, dvc, scm):\n    with pytest.raises(CheckoutErrorSuggestGit) as e:\n        dvc.checkout(targets=\"gitbranch\")\n    assert isinstance(e.value.__cause__, NoOutputOrStageError)\n    assert isinstance(e.value.__cause__.__cause__, StageFileDoesNotExistError)\n\n    with pytest.raises(CheckoutErrorSuggestGit) as e:\n        dvc.checkout(targets=\"foobar\")\n    assert isinstance(e.value.__cause__, NoOutputOrStageError)\n    assert isinstance(e.value.__cause__.__cause__, StageFileDoesNotExistError)\n\n    with pytest.raises(CheckoutErrorSuggestGit) as e:\n        dvc.checkout(targets=\"looks-like-dvcfile.dvc\")\n    assert isinstance(e.value.__cause__, StageFileDoesNotExistError)\n    assert e.value.__cause__.__cause__ is None\n\n\ndef test_checkout_target_recursive_should_not_remove_other_used_files(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\", \"data\": {\"file\": \"file\"}})\n    assert main([\"checkout\", \"-R\", \"data\"]) == 0\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"bar\").exists()\n\n\ndef test_checkout_recursive_not_directory(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    assert dvc.checkout(targets=[\"foo.dvc\"], recursive=True) == empty_checkout\n\n\ndef test_checkout_moved_cache_dir_with_symlinks(tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\", \"data\": {\"file\": \"file\"}})\n    ret = main([\"config\", \"cache.type\", \"symlink\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n\n    assert system.is_symlink(\"foo\")\n    old_foo_link = os.path.realpath(\"foo\")\n\n    assert system.is_symlink(os.path.join(\"data\", \"file\"))\n    old_data_link = os.path.realpath(os.path.join(\"data\", \"file\"))\n\n    old_cache_dir = str(tmp_dir / \".dvc\" / \"cache\")\n    new_cache_dir = str(tmp_dir / \".dvc\" / \"cache_new\")\n    os.rename(old_cache_dir, new_cache_dir)\n\n    ret = main([\"cache\", \"dir\", new_cache_dir])\n    assert ret == 0\n\n    ret = main([\"checkout\", \"-f\"])\n    assert ret == 0\n\n    assert system.is_symlink(\"foo\")\n    new_foo_link = os.path.realpath(\"foo\")\n\n    assert system.is_symlink(os.path.join(\"data\", \"file\"))\n    new_data_link = os.path.realpath(os.path.join(\"data\", \"file\"))\n\n    assert relpath(old_foo_link, old_cache_dir) == relpath(new_foo_link, new_cache_dir)\n\n    assert relpath(old_data_link, old_cache_dir) == relpath(\n        new_data_link, new_cache_dir\n    )\n\n\ndef test_checkout_no_checksum(tmp_dir, dvc):\n    tmp_dir.gen(\"file\", \"file content\")\n    dvc.run(outs=[\"file\"], no_exec=True, cmd=\"somecmd\", name=\"stage1\")\n\n    with pytest.raises(CheckoutError):\n        dvc.checkout([\"stage1\"], force=True)\n\n    assert not os.path.exists(\"file\")\n\n\n@pytest.mark.parametrize(\n    \"link, link_test_func\",\n    [(\"hardlink\", system.is_hardlink), (\"symlink\", system.is_symlink)],\n)\ndef test_checkout_relink(tmp_dir, dvc, link, link_test_func):\n    dvc.cache.local.cache_types = [link]\n\n    tmp_dir.dvc_gen({\"dir\": {\"data\": \"text\"}})\n\n    data_file = os.path.join(\"dir\", \"data\")\n\n    # NOTE: Windows symlink perms don't propagate to the target\n    if not (os.name == \"nt\" and link == \"symlink\"):\n        assert not os.access(data_file, os.W_OK)\n\n    dvc.unprotect(data_file)\n    assert os.access(data_file, os.W_OK)\n    assert not link_test_func(data_file)\n\n    assert dvc.checkout([\"dir.dvc\"], relink=True) == empty_checkout\n    assert link_test_func(data_file)\n\n    # NOTE: Windows symlink perms don't propagate to the target and\n    # hardlink was chmod-ed during relink to be deleted\n    if not (os.name == \"nt\" and link in [\"symlink\", \"hardlink\"]):\n        assert not os.access(data_file, os.W_OK)\n\n\n@pytest.mark.parametrize(\n    \"target\",\n    [os.path.join(\"dir\", \"subdir\"), os.path.join(\"dir\", \"subdir\", \"file\")],\n)\ndef test_partial_checkout(tmp_dir, dvc, target):\n    tmp_dir.dvc_gen({\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}})\n    shutil.rmtree(\"dir\")\n    assert dvc.checkout([target]) == empty_checkout | {\n        \"added\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert list(walk_files(\"dir\")) == [os.path.join(\"dir\", \"subdir\", \"file\")]\n\n\nempty_stats = {\"added\": 0, \"deleted\": 0, \"modified\": 0}\nempty_checkout = {\"added\": [], \"deleted\": [], \"modified\": [], \"stats\": empty_stats}\n\n\ndef test_stats_on_empty_checkout(tmp_dir, dvc, scm):\n    assert dvc.checkout() == empty_checkout\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}},\n        commit=\"initial\",\n    )\n    assert dvc.checkout() == empty_checkout\n\n\ndef test_stats_on_checkout(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\n            \"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"},\n            \"foo\": \"foo\",\n            \"bar\": \"bar\",\n        },\n        commit=\"initial\",\n    )\n    scm.checkout(\"HEAD~\")\n    stats = dvc.checkout()\n    assert stats == empty_checkout | {\n        \"deleted\": [\"dir\" + os.sep, \"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"deleted\": 3},\n    }\n\n    scm.checkout(\"-\")\n    stats = dvc.checkout()\n    assert stats == empty_checkout | {\n        \"added\": [\"dir\" + os.sep, \"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"added\": 4},\n    }\n\n    tmp_dir.gen({\"lorem\": \"lorem\", \"bar\": \"new bar\", \"dir2\": {\"file\": \"file\"}})\n    (tmp_dir / \"foo\").unlink()\n    git_rm(tmp_dir, [\"foo.dvc\"])\n    tmp_dir.dvc_add([\"bar\", \"lorem\", \"dir2\"], commit=\"second\")\n\n    scm.checkout(\"HEAD~\")\n    stats = dvc.checkout()\n    assert stats == empty_checkout | {\n        \"modified\": [\"bar\"],\n        \"added\": [\"foo\"],\n        \"deleted\": [\"dir2\" + os.sep, \"lorem\"],\n        \"stats\": {\"modified\": 1, \"added\": 1, \"deleted\": 2},\n    }\n\n    scm.checkout(\"-\")\n    stats = dvc.checkout()\n    assert stats == empty_checkout | {\n        \"modified\": [\"bar\"],\n        \"added\": [\"dir2\" + os.sep, \"lorem\"],\n        \"deleted\": [\"foo\"],\n        \"stats\": {\"modified\": 1, \"added\": 2, \"deleted\": 1},\n    }\n\n\ndef test_checkout_stats_on_failure(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\"foo\": \"foo\", \"dir\": {\"subdir\": {\"file\": \"file\"}}, \"other\": \"other\"},\n        commit=\"initial\",\n    )\n    stage = load_file(dvc, \"foo.dvc\").stage\n    tmp_dir.dvc_gen({\"foo\": \"foobar\", \"other\": \"other other\"}, commit=\"second\")\n\n    # remove object from cache\n    cache = stage.outs[0].cache_path\n    remove(cache)\n\n    (tmp_dir / \"foo\").unlink()\n\n    scm.checkout(\"HEAD~\")\n    with pytest.raises(CheckoutError) as exc:\n        dvc.checkout(force=True)\n\n    assert exc.value.result == empty_checkout | {\n        \"failed\": [\"foo\"],\n        \"modified\": [\"other\"],\n        \"stats\": empty_stats | {\"modified\": 1},\n    }\n\n\ndef test_stats_on_added_file_from_tracked_dir(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}},\n        commit=\"initial\",\n    )\n\n    tmp_dir.gen(\"dir/subdir/newfile\", \"newfile\")\n    tmp_dir.dvc_add(\"dir\", commit=\"add newfile\")\n    scm.checkout(\"HEAD~\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"deleted\": 1},\n    }\n    assert dvc.checkout() == empty_checkout\n\n    scm.checkout(\"-\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert dvc.checkout() == empty_checkout\n\n\ndef test_stats_on_updated_file_from_tracked_dir(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}},\n        commit=\"initial\",\n    )\n\n    tmp_dir.gen(\"dir/subdir/file\", \"what file?\")\n    tmp_dir.dvc_add(\"dir\", commit=\"update file\")\n    scm.checkout(\"HEAD~\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"modified\": 1},\n    }\n    assert dvc.checkout() == empty_checkout\n\n    scm.checkout(\"-\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"modified\": 1},\n    }\n    assert dvc.checkout() == empty_checkout\n\n\ndef test_stats_on_removed_file_from_tracked_dir(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}},\n        commit=\"initial\",\n    )\n\n    (tmp_dir / \"dir\" / \"subdir\" / \"file\").unlink()\n    tmp_dir.dvc_add(\"dir\", commit=\"removed file from subdir\")\n    scm.checkout(\"HEAD~\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert dvc.checkout() == empty_checkout\n\n    scm.checkout(\"-\")\n    assert dvc.checkout() == {\n        **empty_checkout,\n        \"modified\": [\"dir\" + os.sep],\n        \"stats\": empty_stats | {\"deleted\": 2},\n    }\n    assert dvc.checkout() == empty_checkout\n\n\ndef test_stats_on_show_changes_does_not_show_summary(tmp_dir, dvc, scm, capsys):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}}, \"other\": \"other\"},\n        commit=\"initial\",\n    )\n    scm.checkout(\"HEAD~\")\n\n    assert main([\"checkout\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert out.splitlines() == [\n        f\"D\\tdir{os.sep}\".expandtabs(),\n        \"D\\tother\".expandtabs(),\n    ]\n\n\ndef test_stats_does_not_show_changes_by_default(tmp_dir, dvc, scm, capsys):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"file\": \"file\"}}, \"other\": \"other\"},\n        commit=\"initial\",\n    )\n    scm.checkout(\"HEAD~\")\n\n    assert main([\"checkout\", \"--summary\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert out.rstrip() == \"2 files deleted\"\n\n\n@pytest.mark.parametrize(\"link\", [\"hardlink\", \"symlink\", \"copy\"])\ndef test_checkout_with_relink_existing(tmp_dir, dvc, link):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (tmp_dir / \"foo\").unlink()\n\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n    dvc.cache.local.cache_types = [link]\n\n    stats = dvc.checkout(relink=True)\n    assert stats == {\n        **empty_checkout,\n        \"added\": [\"foo\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n\ndef test_checkout_with_deps(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n    dvc.run(cmd=\"echo foo > bar\", outs=[\"bar\"], deps=[\"foo\"], name=\"copy-file\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"foo\").unlink()\n\n    stats = dvc.checkout([\"copy-file\"], with_deps=False)\n    assert stats == {\n        **empty_checkout,\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    stats = dvc.checkout([\"copy-file\"], with_deps=True)\n    assert stats == {\n        **empty_checkout,\n        \"added\": [\"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n\ndef test_checkout_recursive(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    dvc.add(\"dir/*\", glob=True)\n\n    (tmp_dir / \"dir\" / \"foo\").unlink()\n    (tmp_dir / \"dir\" / \"bar\").unlink()\n\n    stats = dvc.checkout([\"dir\"], recursive=True)\n    assert stats == empty_checkout | {\n        \"added\": [os.path.join(\"dir\", \"bar\"), os.path.join(\"dir\", \"foo\")],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n\ndef test_checkouts_with_different_addressing(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"lorem\": \"lorem\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    run_copy(\"lorem\", \"ipsum\", name=\"copy-lorem-ipsum\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"ipsum\").unlink()\n    assert dvc.checkout(PROJECT_FILE) == empty_checkout | {\n        \"added\": [\"bar\", \"ipsum\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"ipsum\").unlink()\n    assert dvc.checkout(\":\") == empty_checkout | {\n        \"added\": [\"bar\", \"ipsum\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout(\"copy-foo-bar\") == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout(\"dvc.yaml:copy-foo-bar\") == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout(\":copy-foo-bar\") == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"data\").mkdir()\n    with (tmp_dir / \"data\").chdir():\n        assert dvc.checkout(\n            relpath(tmp_dir / \"dvc.yaml\") + \":copy-foo-bar\"\n        ) == empty_checkout | {\n            \"added\": [relpath(tmp_dir / \"bar\")],\n            \"stats\": empty_stats | {\"added\": 1},\n        }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout(\"bar\") == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n\ndef test_checkouts_on_same_stage_name_and_output_name(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    run_copy(\"foo\", \"copy-foo-bar\", name=\"make_collision\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"copy-foo-bar\").unlink()\n    assert dvc.checkout(\"copy-foo-bar\") == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert dvc.checkout(\"./copy-foo-bar\") == empty_checkout | {\n        \"added\": [\"copy-foo-bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n\ndef test_checkouts_for_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage1 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    tmp_dir.gen(\"lorem\", \"lorem\")\n    stage2 = run_copy(\"lorem\", \"ipsum\", name=\"copy-lorem-ipsum\")\n\n    for out in [\"bar\", \"ipsum\"]:\n        (tmp_dir / out).unlink()\n    assert dvc.checkout([\"bar\"]) == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout([PROJECT_FILE]) == empty_checkout | {\n        \"added\": [\"bar\", \"ipsum\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n    for out in [\"bar\", \"ipsum\"]:\n        (tmp_dir / out).unlink()\n    assert dvc.checkout([stage1.addressing]) == empty_checkout | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.checkout([stage2.addressing]) == empty_checkout | {\n        \"added\": [\"ipsum\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    (tmp_dir / \"ipsum\").unlink()\n    assert dvc.checkout() == empty_checkout | {\n        \"added\": [\"bar\", \"ipsum\"],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n\n\ndef test_checkout_executable(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    contents = (tmp_dir / \"foo.dvc\").parse()\n    contents[\"outs\"][0][\"isexec\"] = True\n    (tmp_dir / \"foo.dvc\").dump(contents)\n\n    assert dvc.checkout(\"foo\") == empty_checkout | {\n        \"modified\": [\"foo\"],\n        \"stats\": empty_stats | {\"modified\": 1},\n    }\n\n    isexec = os.stat(\"foo\").st_mode & stat.S_IEXEC\n    if os.name == \"nt\":\n        # NOTE: you can't set exec bits on Windows\n        assert not isexec\n    else:\n        assert isexec\n\n\ndef test_checkout_partial(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\", \"sub_dir\": {\"baz\": \"baz\"}}})\n\n    data_dir = tmp_dir / \"data\"\n    shutil.rmtree(data_dir)\n\n    assert dvc.checkout(str(data_dir / \"foo\")) == empty_checkout | {\n        \"added\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert data_dir.read_text() == {\"foo\": \"foo\"}\n\n    assert dvc.checkout(str(data_dir / \"sub_dir\" / \"baz\")) == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert data_dir.read_text() == {\"foo\": \"foo\", \"sub_dir\": {\"baz\": \"baz\"}}\n\n    assert dvc.checkout(str(data_dir / \"bar\")) == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert data_dir.read_text() == {\n        \"foo\": \"foo\",\n        \"bar\": \"bar\",\n        \"sub_dir\": {\"baz\": \"baz\"},\n    }\n\n\ndef test_checkout_partial_unchanged(tmp_dir, dvc):\n    original_dir_shape = {\n        \"foo\": \"foo\",\n        \"bar\": \"bar\",\n        \"sub_dir\": {\"baz\": \"baz\"},\n        \"empty_sub_dir\": {},\n    }\n    tmp_dir.dvc_gen({\"data\": original_dir_shape})\n\n    data_dir = tmp_dir / \"data\"\n    sub_dir = data_dir / \"sub_dir\"\n    foo = data_dir / \"foo\"\n    bar = data_dir / \"bar\"\n    sub_dir_file = sub_dir / \"baz\"\n\n    # Nothing changed, nothing added/deleted/modified\n    assert dvc.checkout(str(bar)) == empty_checkout\n\n    # Irrelevant file changed, still nothing added/deleted/modified\n    foo.unlink()\n    assert dvc.checkout(str(bar)) == empty_checkout\n\n    # Relevant change, one modified\n    bar.unlink()\n    stats = dvc.checkout(str(bar))\n    assert stats == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    # No changes inside data/sub\n    assert dvc.checkout(str(sub_dir)) == empty_checkout\n\n    # Relevant change, one modified\n    sub_dir_file.unlink()\n    stats = dvc.checkout(str(sub_dir))\n    assert stats == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert len(stats[\"modified\"]) == 1\n\n    stats = dvc.checkout(str(data_dir / \"empty_sub_dir\"))\n    assert stats == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"deleted\": 1},\n    }\n\n    assert dvc.checkout(str(data_dir)) == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    # Everything is in place, no action taken\n    assert dvc.checkout(str(data_dir)) == empty_checkout\n\n\ndef test_checkout_partial_subdir(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"sub_dir\": {\"bar\": \"bar\", \"baz\": \"baz\"}}})\n\n    data_dir = tmp_dir / \"data\"\n    sub_dir = data_dir / \"sub_dir\"\n    sub_dir_bar = sub_dir / \"baz\"\n\n    shutil.rmtree(sub_dir)\n    assert dvc.checkout(str(sub_dir)) == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 2},\n    }\n    assert data_dir.read_text() == {\n        \"foo\": \"foo\",\n        \"sub_dir\": {\"bar\": \"bar\", \"baz\": \"baz\"},\n    }\n\n    sub_dir_bar.unlink()\n    assert dvc.checkout(str(sub_dir_bar)) == empty_checkout | {\n        \"modified\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert data_dir.read_text() == {\n        \"foo\": \"foo\",\n        \"sub_dir\": {\"bar\": \"bar\", \"baz\": \"baz\"},\n    }\n\n\ndef test_checkout_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    assert dvc.checkout(\"foo\") == empty_checkout\n\n    os.unlink(\"foo\")\n    stats = dvc.checkout(\"foo\")\n    assert stats == empty_checkout | {\n        \"added\": [\"foo\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n\ndef test_checkout_dir_compat(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\"}})\n    tmp_dir.gen(\n        \"data.dvc\",\n        textwrap.dedent(\n            f\"\"\"\\\n        outs:\n        - md5: {stage.outs[0].hash_info.value}\n          hash: md5\n          path: data\n        \"\"\"\n        ),\n    )\n    remove(\"data\")\n    assert dvc.checkout() == empty_checkout | {\n        \"added\": [\"data\" + os.sep],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n    assert (tmp_dir / \"data\").read_text() == {\"foo\": \"foo\"}\n\n\ndef test_checkout_cleanup_properly_on_untracked_nested_directories(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"datasets\": {\"dir1\": {\"file1\": \"file1\"}}})\n    tmp_dir.gen({\"datasets\": {\"dir2\": {\"dir3\": {\"file2\": \"file2\"}}}})\n\n    assert dvc.checkout(force=True) == empty_checkout | {\n        \"modified\": [\"datasets\" + os.sep],\n        \"stats\": empty_stats | {\"deleted\": 3},\n    }\n\n    assert (tmp_dir / \"datasets\").read_text() == {\"dir1\": {\"file1\": \"file1\"}}\n\n\ndef test_checkout_loads_specific_file(tmp_dir, dvc, mocker):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"foo\").unlink()\n\n    f = SingleStageFile(dvc, \"foo.dvc\")\n\n    spy = mocker.spy(FileMixin, \"_load\")\n    assert dvc.checkout(\"foo.dvc\") == empty_checkout | {\n        \"added\": [\"foo\"],\n        \"stats\": empty_stats | {\"added\": 1},\n    }\n\n    spy.assert_called_with(f)\n    assert (tmp_dir / \"foo\").exists()\n    assert not (tmp_dir / \"bar\").exists()\n"
  },
  {
    "path": "tests/func/test_cli.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.cli import DvcParserError, parse_args\nfrom dvc.cli.command import CmdBase\nfrom dvc.commands.add import CmdAdd\nfrom dvc.commands.checkout import CmdCheckout\nfrom dvc.commands.config import CmdConfig\nfrom dvc.commands.data_sync import CmdDataPull, CmdDataPush\nfrom dvc.commands.init import CmdInit\nfrom dvc.commands.remove import CmdRemove\nfrom dvc.commands.repro import CmdRepro\nfrom dvc.commands.status import CmdDataStatus\nfrom dvc.exceptions import NotDvcRepoError\n\n\ndef test_argparse(dvc):\n    args = parse_args([\"init\"])\n    assert isinstance(args.func(args), CmdInit)\n\n\ndef test_pull(dvc):\n    args = parse_args([\"pull\"])\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdDataPull)\n\n    cmd.repo.close()\n\n\ndef test_push(dvc):\n    args = parse_args([\"push\"])\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdDataPush)\n\n    cmd.repo.close()\n\n\ndef test_status(dvc):\n    args = parse_args([\"status\"])\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdDataStatus)\n\n    cmd.repo.close()\n\n\ndef test_repro(dvc):\n    target1 = \"1\"\n    target2 = \"2\"\n\n    args = parse_args(\n        [\"repro\", target1, target2, \"-f\", \"--force\", \"-s\", \"--single-item\"]\n    )\n\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdRepro)\n    assert args.targets == [target1, target2]\n    assert args.force\n    assert args.single_item\n\n    cmd.repo.close()\n\n\ndef test_remove(dvc):\n    target1 = \"1\"\n    target2 = \"2\"\n\n    args = parse_args([\"remove\", target1, target2])\n\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdRemove)\n    assert args.targets == [target1, target2]\n\n    cmd.repo.close()\n\n\ndef test_add(dvc):\n    target1 = \"1\"\n    target2 = \"2\"\n\n    args = parse_args([\"add\", target1, target2])\n\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdAdd)\n    assert args.targets == [target1, target2]\n\n    cmd.repo.close()\n\n\ndef test_config_unset(dvc):\n    name = \"section.option\"\n    value = \"1\"\n\n    args = parse_args([\"config\", \"-u\", \"--unset\", name, value])\n\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdConfig)\n    assert args.unset\n    assert args.name == (None, \"section\", \"option\")\n    assert args.value == value\n\n\ndef test_config_list():\n    args = parse_args([\"config\", \"--list\"])\n\n    assert args.list\n    assert args.name is None\n    assert args.value is None\n\n\ndef test_checkout(dvc):\n    args = parse_args([\"checkout\"])\n    cmd = args.func(args)\n    assert isinstance(cmd, CmdCheckout)\n\n    cmd.repo.close()\n\n\ndef test_find_root(dvc):\n    class Cmd(CmdBase):\n        def run(self):\n            pass\n\n    class A:\n        quiet = False\n        verbose = True\n        wait_for_lock = False\n        cd = os.path.pardir\n\n    args = A()\n    with pytest.raises(NotDvcRepoError):\n        Cmd(args)\n\n\ndef test_cd(dvc):\n    class Cmd(CmdBase):\n        def run(self):\n            pass\n\n    class A:\n        quiet = False\n        verbose = True\n        wait_for_lock = False\n        cd = os.path.pardir\n\n    parent_dir = os.path.realpath(os.path.pardir)\n    args = A()\n    with pytest.raises(NotDvcRepoError):\n        Cmd(args)\n    current_dir = os.path.realpath(os.path.curdir)\n    assert parent_dir == current_dir\n\n\ndef test_unknown_command_help(capsys):\n    try:\n        _ = parse_args([\"unknown\"])\n    except DvcParserError:\n        pass\n    captured = capsys.readouterr()\n    output = captured.out\n    try:\n        _ = parse_args([\"--help\"])\n    except SystemExit:\n        pass\n    captured = capsys.readouterr()\n    help_output = captured.out\n    assert output == help_output\n\n\ndef test_unknown_subcommand_help(capsys):\n    sample_subcommand = \"push\"\n    try:\n        _ = parse_args([sample_subcommand, \"--unknown\"])\n    except DvcParserError:\n        pass\n    captured = capsys.readouterr()\n    output = captured.out\n    try:\n        _ = parse_args([sample_subcommand, \"--help\"])\n    except SystemExit:\n        pass\n    captured = capsys.readouterr()\n    help_output = captured.out\n    assert output == help_output\n"
  },
  {
    "path": "tests/func/test_commit.py",
    "content": "import os\nimport textwrap\n\nimport pytest\n\nfrom dvc.dependency.base import DependencyDoesNotExistError\nfrom dvc.dvcfile import PROJECT_FILE, Lockfile, ProjectFile, SingleStageFile\nfrom dvc.fs import localfs\nfrom dvc.output import OutputDoesNotExistError\nfrom dvc.stage.exceptions import StageCommitError\n\n\ndef test_commit_recursive(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}})\n    stages = dvc.add(localfs.find(\"dir\"), no_commit=True)\n\n    assert len(stages) == 2\n    assert dvc.status() != {}\n\n    dvc.commit(\"dir\", recursive=True)\n    assert dvc.status() == {}\n\n\ndef test_commit_force(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"file\": \"text1\", \"file2\": \"text2\"}})\n    (stage,) = dvc.add(\"dir\", no_commit=True)\n\n    assert stage.outs[0].changed_cache()\n\n    tmp_dir.gen(\"dir/file\", \"file content modified\")\n\n    assert stage.outs[0].changed_cache()\n\n    with pytest.raises(StageCommitError):\n        dvc.commit(stage.path)\n\n    assert stage.outs[0].changed_cache()\n\n    dvc.commit(stage.path, force=True)\n    assert dvc.status([stage.path]) == {}\n\n\ndef test_commit_preserve_fields(tmp_dir, dvc):\n    text = textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        outs:\n        - path: foo # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          meta:\n            key1: value1\n            key2: value2\n          remote: testremote\n          hash: md5\n        meta: some metadata\n    \"\"\"\n    )\n    tmp_dir.gen(\"foo.dvc\", text)\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=False)\n    dvc.commit(\"foo\")\n    assert (tmp_dir / \"foo.dvc\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        outs:\n        - path: foo # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          meta:\n            key1: value1\n            key2: value2\n          remote: testremote\n          hash: md5\n          md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n        meta: some metadata\n    \"\"\"\n    )\n\n\ndef test_commit_with_deps(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (foo_stage,) = dvc.add(\"foo\", no_commit=True)\n    assert foo_stage is not None\n    assert len(foo_stage.outs) == 1\n\n    stage = run_copy(\"foo\", \"file\", no_commit=True, name=\"copy\")\n    assert stage is not None\n    assert len(stage.outs) == 1\n\n    assert foo_stage.outs[0].changed_cache()\n    assert stage.outs[0].changed_cache()\n\n    dvc.commit(stage.path, with_deps=True)\n    assert not foo_stage.outs[0].changed_cache()\n    assert not stage.outs[0].changed_cache()\n\n\ndef test_commit_changed_md5(tmp_dir, dvc):\n    tmp_dir.gen({\"file\": \"file content\"})\n    (stage,) = dvc.add(\"file\", no_commit=True)\n\n    stage_file_content = (tmp_dir / stage.path).parse()\n    stage_file_content[\"md5\"] = \"1111111111\"\n    (tmp_dir / stage.path).dump(stage_file_content)\n\n    with pytest.raises(StageCommitError):\n        dvc.commit(stage.path)\n\n    dvc.commit(stage.path, force=True)\n    assert \"md5\" not in (tmp_dir / stage.path).parse()\n\n\ndef test_commit_no_exec(tmp_dir, dvc):\n    tmp_dir.gen({\"dep\": \"dep\", \"out\": \"out\"})\n    stage = dvc.run(name=\"my\", cmd=\"mycmd\", deps=[\"dep\"], outs=[\"out\"], no_exec=True)\n\n    assert dvc.status(stage.path)\n    dvc.commit(stage.path, force=True)\n    assert dvc.status(stage.path) == {}\n\n\ndef test_commit_granular_output(tmp_dir, dvc):\n    dvc.run(\n        name=\"mystage\",\n        cmd=[\n            \"python -c \\\"open('foo', 'wb').write(b'foo\\\\n')\\\"\",\n            \"python -c \\\"open('bar', 'wb').write(b'bar\\\\n')\\\"\",\n        ],\n        outs=[\"foo\", \"bar\"],\n        no_commit=True,\n    )\n\n    cache = tmp_dir / \".dvc\" / \"cache\" / \"files\" / \"md5\"\n    assert not list(cache.glob(\"*/*\"))\n\n    dvc.commit(\"foo\")\n    assert list(cache.glob(\"*/*\")) == [cache / \"d3\" / \"b07384d113edec49eaa6238ad5ff00\"]\n\n\ndef test_commit_granular_output_file(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\", no_commit=True)\n    dvc.commit(\"foo\")\n    assert dvc.status() == {}\n\n\ndef test_commit_granular_output_dir(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"data\": {\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n                \"subdir\": {\"subfoo\": \"subfoo\", \"subbar\": \"subbar\"},\n            }\n        }\n    )\n    dvc.add(\"data\", no_commit=True)\n    dvc.commit(\"data\")\n    assert dvc.status() == {}\n\n\ndef test_commit_granular_dir(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"data\": {\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n                \"subdir\": {\"subfoo\": \"subfoo\", \"subbar\": \"subbar\"},\n            }\n        }\n    )\n    dvc.add(\"data\", no_commit=True)\n\n    cache = tmp_dir / \".dvc\" / \"cache\" / \"files\" / \"md5\"\n\n    assert set(cache.glob(\"*/*\")) == set()\n\n    dvc.commit(os.path.join(\"data\", \"foo\"))\n    assert set(cache.glob(\"*/*\")) == {\n        cache / \"1a\" / \"ca2c799df82929bbdd976557975546.dir\",\n        cache / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\",\n    }\n\n    dvc.commit(os.path.join(\"data\", \"subdir\"))\n    assert set(cache.glob(\"*/*\")) == {\n        cache / \"1a\" / \"ca2c799df82929bbdd976557975546.dir\",\n        cache / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\",\n        cache / \"4c\" / \"e8d2a2cf314a52fa7f315ca37ca445\",\n        cache / \"68\" / \"dde2c3c4e7953c2290f176bbdc9a54\",\n    }\n\n    dvc.commit(os.path.join(\"data\"))\n    assert set(cache.glob(\"*/*\")) == {\n        cache / \"1a\" / \"ca2c799df82929bbdd976557975546.dir\",\n        cache / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\",\n        cache / \"4c\" / \"e8d2a2cf314a52fa7f315ca37ca445\",\n        cache / \"68\" / \"dde2c3c4e7953c2290f176bbdc9a54\",\n        cache / \"37\" / \"b51d194a7513e45b56f6524f2d51f2\",\n    }\n\n\ndef test_commit_no_exec_missing_dep(tmp_dir, dvc):\n    stage = dvc.run(name=\"my\", cmd=\"mycmd\", deps=[\"dep\"], outs=[\"out\"], no_exec=True)\n    assert dvc.status(stage.path)\n\n    with pytest.raises(DependencyDoesNotExistError):\n        dvc.commit(stage.path, force=True)\n\n\ndef test_commit_no_exec_missing_out(tmp_dir, dvc):\n    stage = dvc.run(name=\"my\", cmd=\"mycmd\", outs=[\"out\"], no_exec=True)\n    assert dvc.status(stage.path)\n\n    with pytest.raises(OutputDoesNotExistError):\n        dvc.commit(stage.path, force=True)\n\n\ndef test_commit_pipeline_stage(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", no_commit=True, name=\"copy-foo-bar\")\n    assert dvc.status(stage.addressing)\n    assert dvc.commit(stage.addressing, force=True) == [stage]\n    assert not dvc.status(stage.addressing)\n\n    # just to confirm different variants work\n    assert dvc.commit(f\":{stage.addressing}\") == [stage]\n    assert dvc.commit(f\"{PROJECT_FILE}:{stage.addressing}\") == [stage]\n    assert dvc.commit(PROJECT_FILE) == [stage]\n\n\ndef test_imported_entries_unchanged(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"file content\", \"initial commit\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"file\")\n\n    assert stage.changed_entries() == ([], [], None)\n\n\ndef test_commit_updates_to_cloud_versioning_dir(tmp_dir, dvc):\n    data_dvc = tmp_dir / \"data.dvc\"\n    data_dvc.dump(\n        {\n            \"outs\": [\n                {\n                    \"path\": \"data\",\n                    \"hash\": \"md5\",\n                    \"files\": [\n                        {\n                            \"size\": 3,\n                            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n                            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"relpath\": \"bar\",\n                        },\n                        {\n                            \"size\": 3,\n                            \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n                            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                            \"relpath\": \"foo\",\n                        },\n                    ],\n                }\n            ]\n        }\n    )\n\n    data = tmp_dir / \"data\"\n    data.mkdir()\n    (data / \"foo\").write_text(\"foo\")\n    (data / \"bar\").write_text(\"bar2\")\n\n    dvc.commit(\"data\", force=True)\n\n    assert (tmp_dir / \"data.dvc\").parse() == {\n        \"outs\": [\n            {\n                \"path\": \"data\",\n                \"hash\": \"md5\",\n                \"files\": [\n                    {\n                        \"size\": 4,\n                        \"md5\": \"224e2539f52203eb33728acd228b4432\",\n                        \"relpath\": \"bar\",\n                    },\n                    {\n                        \"size\": 3,\n                        \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n                        \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                        \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                        \"relpath\": \"foo\",\n                    },\n                ],\n            }\n        ]\n    }\n\n\ndef test_commit_dos2unix(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    (tmp_dir / \"foo.dvc\").dump(\n        {\n            \"outs\": [\n                {\"path\": \"foo\", \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\", \"size\": 3},\n            ]\n        }\n    )\n    legacy_content = (tmp_dir / \"foo.dvc\").read_text()\n    assert \"hash: md5\" not in legacy_content\n\n    dvc.commit(\"foo.dvc\", force=True)\n    assert (tmp_dir / \"foo.dvc\").read_text() == legacy_content\n\n    tmp_dir.gen(\"foo\", \"modified\")\n    dvc.commit(\"foo.dvc\", force=True)\n    content = (tmp_dir / \"foo.dvc\").read_text()\n    assert \"hash: md5\" in content\n\n\ndef test_commit_multiple_files(tmp_dir, dvc, mocker):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    stages = dvc.add([\"foo\", \"bar\"], no_commit=True)\n    test1_stage = dvc.stage.add(name=\"test\", cmd=\"echo test\", deps=[\"foo\"])\n    test2_stage = dvc.stage.add(name=\"test2\", cmd=\"echo test2\", deps=[\"foo\"])\n\n    subdir = tmp_dir / \"subdir\"\n    subdir.mkdir()\n    with subdir.chdir():\n        bar_relpath = os.path.relpath(tmp_dir / \"bar\", subdir)\n        test3_stage = dvc.stage.add(name=\"test3\", cmd=\"echo test3\", deps=[bar_relpath])\n\n    pointerfile_spy = mocker.spy(SingleStageFile, \"dump_stages\")\n    projectfile_spy = mocker.spy(ProjectFile, \"dump_stages\")\n    lockfile_spy = mocker.spy(Lockfile, \"dump_stages\")\n\n    assert set(dvc.commit(force=True)) == {\n        *stages,\n        test1_stage,\n        test2_stage,\n        test3_stage,\n    }\n    pointerfile_spy.assert_has_calls(\n        [\n            mocker.call(stages[0].dvcfile, [stages[0]], update_pipeline=False),\n            mocker.call(stages[1].dvcfile, [stages[1]], update_pipeline=False),\n        ],\n        any_order=True,\n    )\n    projectfile_spy.assert_has_calls(\n        [\n            mocker.call(\n                test1_stage.dvcfile, [test1_stage, test2_stage], update_pipeline=False\n            ),\n            mocker.call(test3_stage.dvcfile, [test3_stage], update_pipeline=False),\n        ],\n        any_order=True,\n    )\n    lockfile_spy.assert_has_calls(\n        [\n            mocker.call(test1_stage.dvcfile._lockfile, [test1_stage, test2_stage]),\n            mocker.call(test3_stage.dvcfile._lockfile, [test3_stage]),\n        ],\n        any_order=True,\n    )\n    assert dvc.status() == {}\n"
  },
  {
    "path": "tests/func/test_config.py",
    "content": "import os\nimport textwrap\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.config import Config, ConfigError\n\n\ndef test_config_set(tmp_dir, dvc):\n    assert main([\"config\", \"core.analytics\", \"false\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n            analytics = false\n        \"\"\"\n    )\n    assert not (tmp_dir / \".dvc\" / \"config.local\").exists()\n\n    assert main([\"config\", \"core.analytics\", \"true\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n            analytics = true\n        \"\"\"\n    )\n    assert not (tmp_dir / \".dvc\" / \"config.local\").exists()\n\n    assert main([\"config\", \"core.analytics\", \"--unset\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        \"\"\"\n    )\n    assert not (tmp_dir / \".dvc\" / \"config.local\").exists()\n\n\ndef test_config_set_local(tmp_dir, dvc):\n    assert main([\"config\", \"core.analytics\", \"false\", \"--local\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        \"\"\"\n    )\n    assert (tmp_dir / \".dvc\" / \"config.local\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            analytics = false\n        \"\"\"\n    )\n\n    assert main([\"config\", \"core.analytics\", \"true\", \"--local\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        \"\"\"\n    )\n    assert (tmp_dir / \".dvc\" / \"config.local\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            analytics = true\n        \"\"\"\n    )\n\n    assert main([\"config\", \"core.analytics\", \"--unset\", \"--local\"]) == 0\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        \"\"\"\n    )\n    assert (tmp_dir / \".dvc\" / \"config.local\").read_text() == \"\\n\"\n\n\ndef test_config_set_in_non_dvc_repo(tmp_dir, caplog):\n    assert main([\"config\", \"core.analytics\", \"true\"]) != 0\n    out = caplog.text\n    assert \"Not inside a DVC repo\" in out\n\n\n@pytest.mark.parametrize(\n    \"args, ret, msg\",\n    [\n        ([\"core.analytics\"], 0, \"False\"),\n        ([\"core.remote\"], 0, \"myremote\"),\n        ([\"remote.myremote.profile\"], 0, \"treeverse\"),\n        ([\"remote.myremote.profile\", \"--local\"], 0, \"treeverse\"),\n        (\n            [\"remote.myremote.profile\", \"--project\"],\n            251,\n            \"option 'profile' doesn't exist\",\n        ),\n        ([\"remote.other.url\"], 0, \"gs://bucket/path\"),\n        ([\"remote.other.url\", \"--local\"], 0, \"gs://bucket/path\"),\n        (\n            [\"remote.other.url\", \"--project\"],\n            251,\n            \"remote 'other' doesn't exist\",\n        ),\n    ],\n)\ndef test_config_get(tmp_dir, dvc, capsys, caplog, args, ret, msg):\n    (tmp_dir / \".dvc\" / \"config\").write_text(\n        textwrap.dedent(\n            \"\"\"\\\n        [core]\n            no_scm = true\n            analytics = False\n            remote = myremote\n        ['remote \"myremote\"']\n            url = s3://bucket/path\n            region = us-east-2\n        \"\"\"\n        )\n    )\n    (tmp_dir / \".dvc\" / \"config.local\").write_text(\n        textwrap.dedent(\n            \"\"\"\\\n        ['remote \"myremote\"']\n            profile = treeverse\n        ['remote \"other\"']\n            url = gs://bucket/path\n        \"\"\"\n        )\n    )\n\n    assert main([\"config\", *args]) == ret\n    text = caplog.text if ret else capsys.readouterr()[0]\n    assert msg in text\n\n\n@pytest.mark.parametrize(\n    \"args, ret\",\n    [\n        ([\"--local\", \"core.remote\"], 251),\n        ([\"--project\", \"core.remote\"], 251),\n        ([\"core.remote\"], 0),\n    ],\n)\ndef test_config_get_in_non_dvc_repo(tmp_dir, caplog, args, ret):\n    assert main([\"config\", *args]) == ret\n    if ret != 0:\n        out = caplog.text\n        assert \"Not inside a DVC repo\" in out\n\n\ndef test_config_list(tmp_dir, dvc, capsys):\n    (tmp_dir / \".dvc\" / \"config\").write_text(\n        textwrap.dedent(\n            \"\"\"\\\n        [core]\n            no_scm = true\n            analytics = False\n            remote = myremote\n        ['remote \"myremote\"']\n            url = s3://bucket/path\n            region = us-east-2\n        \"\"\"\n        )\n    )\n    (tmp_dir / \".dvc\" / \"config.local\").write_text(\n        textwrap.dedent(\n            \"\"\"\\\n        ['remote \"myremote\"']\n            profile = treeverse\n            access_key_id = abcde\n            secret_access_key = 123456\n        ['remote \"other\"']\n            url = gs://bucket/path\n        \"\"\"\n        )\n    )\n\n    assert main([\"config\", \"--list\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert \"remote.myremote.url=s3://bucket/path\" in out\n    assert \"remote.myremote.region=us-east-2\" in out\n    assert \"remote.myremote.profile=treeverse\" in out\n    assert \"remote.myremote.access_key_id=abcde\" in out\n    assert \"remote.myremote.secret_access_key=123456\" in out\n    assert \"remote.other.url=gs://bucket/path\" in out\n    assert \"core.analytics=False\" in out\n    assert \"core.no_scm=true\" in out\n    assert \"core.remote=myremote\" in out\n\n\n@pytest.mark.parametrize(\n    \"args, ret\",\n    [\n        ([\"--list\", \"--local\"], 251),\n        ([\"--list\", \"--project\"], 251),\n        ([\"--list\"], 0),\n    ],\n)\ndef test_config_list_in_non_dvc_repo(tmp_dir, caplog, args, ret):\n    assert main([\"config\", *args]) == ret\n    if ret != 0:\n        out = caplog.text\n        assert \"Not inside a DVC repo\" in out\n\n\n@pytest.mark.parametrize(\n    \"args\", [[\"core.analytics\"], [\"core.analytics\", \"false\"], [\"--unset\"]]\n)\ndef test_list_bad_args(tmp_dir, dvc, caplog, args):\n    caplog.clear()\n    assert main([\"config\", \"--list\", *args]) == 1\n    assert (\n        \"-l/--list can't be used together with any of these options: \"\n        \"-u/--unset, name, value\" in caplog.text\n    )\n\n\ndef test_set_invalid_key(dvc):\n    with pytest.raises(ConfigError, match=r\"extra keys not allowed\"):\n        with dvc.config.edit() as conf:\n            conf[\"core\"][\"invalid_key\"] = \"value\"\n\n\ndef test_merging_two_levels(dvc):\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"test\"] = {\"url\": \"ssh://example.com\"}\n\n    with pytest.raises(ConfigError, match=r\"expected 'url' for dictionary value\"):\n        with dvc.config.edit(\"global\") as conf:\n            conf[\"remote\"][\"test\"] = {\"password\": \"1\"}\n\n    with dvc.config.edit(\"local\") as conf:\n        conf[\"remote\"][\"test\"] = {\"password\": \"1\"}\n\n    assert dvc.config[\"remote\"][\"test\"] == {\n        \"url\": \"ssh://example.com\",\n        \"password\": \"1\",\n        \"verify\": False,\n    }\n\n\ndef test_config_loads_without_error_for_non_dvc_repo(tmp_dir):\n    # regression testing for https://github.com/treeverse/dvc/issues/3328\n    Config.from_cwd(validate=True)\n\n\n@pytest.mark.parametrize(\n    \"field, remote_url\",\n    [\n        (\"credentialpath\", \"s3://mybucket/my/path\"),\n        (\"credentialpath\", \"gs://my-bucket/path\"),\n        (\"keyfile\", \"ssh://user@example.com:1234/path/to/dir\"),\n        (\"cert_path\", \"webdavs://example.com/files/USERNAME/\"),\n        (\"key_path\", \"webdavs://example.com/files/USERNAME/\"),\n        (\"gdrive_service_account_json_file_path\", \"gdrive://root/test\"),\n        (\"gdrive_user_credentials_file\", \"gdrive://root/test\"),\n    ],\n)\ndef test_load_relative_paths(dvc, field, remote_url):\n    # set field to test\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"test\"] = {\"url\": remote_url, field: \"file.txt\"}\n\n    # check if written paths are correct\n    dvc_dir = dvc.config.dvc_dir\n    assert dvc.config[\"remote\"][\"test\"][field] == os.path.abspath(\n        os.path.join(dvc_dir, \"..\", \"file.txt\")\n    )\n\n    # load config and check that it contains what we expect\n    # (relative paths are evaluated correctly)\n    cfg = Config(dvc_dir)\n    assert cfg[\"remote\"][\"test\"][field] == os.path.abspath(\n        os.path.join(dvc_dir, \"..\", \"file.txt\")\n    )\n\n\ndef test_config_gdrive_fields(tmp_dir, dvc):\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"test\"] = {\n            \"url\": \"gdrive://root/test\",\n            \"profile\": \"myprofile\",\n        }\n\n    Config.from_cwd(validate=True)\n\n\ndef test_config_remote(tmp_dir, dvc, capsys):\n    (tmp_dir / \".dvc\" / \"config\").write_text(\n        \"['remote \\\"myremote\\\"']\\n  url = s3://bucket/path\\n  region = myregion\\n\"\n    )\n\n    assert main([\"config\", \"remote.myremote.url\"]) == 0\n    out, _ = capsys.readouterr()\n    assert \"s3://bucket/path\" in out\n\n    assert main([\"config\", \"remote.myremote.region\"]) == 0\n    out, _ = capsys.readouterr()\n    assert \"myregion\" in out\n\n\ndef test_config_show_origin_single(tmp_dir, dvc, capsys):\n    (tmp_dir / \".dvc\" / \"config\").write_text(\n        \"['remote \\\"myremote\\\"']\\n  url = s3://bucket/path\\n  region = myregion\\n\"\n    )\n\n    assert main([\"config\", \"--show-origin\", \"--project\", \"remote.myremote.url\"]) == 0\n    out, _ = capsys.readouterr()\n    assert \"{}\\t{}\\n\".format(os.path.join(\".dvc\", \"config\"), \"s3://bucket/path\") in out\n\n    assert main([\"config\", \"--show-origin\", \"--local\", \"remote.myremote.url\"]) == 251\n\n    assert main([\"config\", \"--list\", \"--project\", \"--show-origin\"]) == 0\n    out, _ = capsys.readouterr()\n    assert (\n        \"{}\\t{}\\n\".format(\n            os.path.join(\".dvc\", \"config\"),\n            \"remote.myremote.url=s3://bucket/path\",\n        )\n        in out\n    )\n\n\ndef test_config_show_origin_merged(tmp_dir, dvc, capsys):\n    (tmp_dir / \".dvc\" / \"config\").write_text(\n        \"['remote \\\"myremote\\\"']\\n  url = s3://bucket/path\\n  region = myregion\\n\"\n    )\n\n    (tmp_dir / \".dvc\" / \"config.local\").write_text(\n        \"['remote \\\"myremote\\\"']\\n  timeout = 100\\n\"\n    )\n\n    assert main([\"config\", \"--list\", \"--show-origin\"]) == 0\n    out, _ = capsys.readouterr()\n    assert (\n        \"{}\\t{}\\n\".format(\n            os.path.join(\".dvc\", \"config\"),\n            \"remote.myremote.url=s3://bucket/path\",\n        )\n        in out\n    )\n\n    assert (\n        \"{}\\t{}\\n\".format(\n            os.path.join(\".dvc\", \"config.local\"), \"remote.myremote.timeout=100\"\n        )\n        in out\n    )\n"
  },
  {
    "path": "tests/func/test_daemon.py",
    "content": "import json\nimport os\nimport re\nimport subprocess\nimport sys\nfrom collections import defaultdict\nfrom collections.abc import Iterator\nfrom contextlib import contextmanager, suppress\nfrom http.server import BaseHTTPRequestHandler, HTTPServer\nfrom pathlib import Path\nfrom threading import Thread\nfrom typing import ClassVar\n\nimport psutil\nimport pytest\n\nfrom dvc import version_tuple\nfrom dvc.daemon import _get_dvc_args\nfrom dvc.env import (\n    DVC_ANALYTICS_ENDPOINT,\n    DVC_DAEMON_LOGFILE,\n    DVC_NO_ANALYTICS,\n    DVC_UPDATER_ENDPOINT,\n)\nfrom dvc.updater import Updater\n\nversion = \".\".join(map(str, version_tuple[:3]))\nUPDATER_INFO = {\n    \"version\": version,\n    \"packages\": {\n        \"linux\": {\n            \"deb\": f\"https://dvc.org/download/linux-deb/dvc-{version}\",\n            \"rpm\": f\"https://dvc.org/download/linux-rpm/dvc-{version}\",\n        },\n        \"windows\": {\"exe\": f\"https://dvc.org/download/win/dvc-{version}\"},\n        \"osx\": {\"pkg\": f\"https://dvc.org/download/osx/dvc-{version}\"},\n    },\n}\n\nUPDATER_INFO_STR = json.dumps(UPDATER_INFO).encode(\"utf8\")\n\n\ndef make_request_handler():\n    class RequestHandler(BaseHTTPRequestHandler):\n        # save requests count for each method\n        hits: ClassVar[dict[str, int]] = defaultdict(int)\n\n        def log_message(self, format, *args) -> None:  # noqa: A002\n            super().log_message(format, *args)\n            if length := self.headers.get(\"Content-Length\"):\n                data = self.rfile.read(int(length)).decode(\"utf8\")\n                sys.stderr.write(f\"{data}\\n\")\n\n        def do_POST(self):\n            # analytics endpoint\n            self.hits[\"POST\"] += 1\n            self.send_response(200)\n            super().end_headers()\n\n        def do_GET(self):\n            # updater endpoint\n            self.hits[\"GET\"] += 1\n            self.send_response(200)\n            self.send_header(\"Content-type\", \"application/json\")\n            self.end_headers()\n            self.wfile.write(UPDATER_INFO_STR)\n\n    return RequestHandler\n\n\n@contextmanager\ndef make_server(port: int = 0) -> Iterator[\"HTTPServer\"]:\n    with HTTPServer((\"localhost\", port), make_request_handler()) as httpd:\n        yield httpd\n\n\n@pytest.fixture\ndef server():\n    with make_server() as httpd:\n        thread = Thread(target=httpd.serve_forever)\n        thread.daemon = True\n        thread.start()\n        try:\n            yield httpd\n        finally:\n            httpd.shutdown()\n\n\ndef test_analytics(tmp_path, server):\n    addr = server.server_address\n    logfile = tmp_path / \"logfile\"\n\n    env = {\n        **os.environ,\n        DVC_DAEMON_LOGFILE: str(logfile),\n        DVC_ANALYTICS_ENDPOINT: \"http://{}:{}\".format(*addr),\n    }\n    env.pop(\"DVC_TEST\", None)\n    env.pop(\"DVC_NO_ANALYTICS\", None)\n    # The `iterative-telemetry` package calls `gh api` to generate a CI id.\n    # This might hang especially on Windows,\n    # possibly due to system load from the running tests.\n    # Removing the GITHUB_ACTIONS env var avoids calling `gh api`.\n    env.pop(\"GITHUB_ACTIONS\", None)\n\n    output = subprocess.check_output(\n        [*_get_dvc_args(), \"config\", \"-l\", \"-vv\"],\n        env=env,\n        text=True,\n    )\n\n    match = re.search(r\".*Saving analytics report to (.*)\", output, flags=re.MULTILINE)\n    assert match, \"no match for the report file\"\n    report_file = match.group(1).strip()\n\n    match = re.search(\n        r\".*Spawned .*analytics.* with pid (.*)\", output, flags=re.MULTILINE\n    )\n    assert match, \"no match for the pid\"\n    pid = int(match.group(1).strip())\n\n    with suppress(psutil.NoSuchProcess):\n        psutil.Process(pid).wait(timeout=10)\n\n    log_contents = logfile.read_text(encoding=\"utf8\")\n    expected_line = (f\"Process {pid} \" if os.name != \"nt\" else \"\") + \"exiting with 0\"\n    assert expected_line in log_contents\n\n    assert not os.path.exists(report_file)\n    assert server.RequestHandlerClass.hits == {\"POST\": 1}\n\n\ndef test_updater(tmp_dir, dvc, server):\n    addr = server.server_address\n    logfile = tmp_dir / \"logfile\"\n\n    env = {\n        **os.environ,\n        DVC_DAEMON_LOGFILE: str(logfile),\n        DVC_UPDATER_ENDPOINT: \"http://{}:{}\".format(*addr),\n        # prevent running analytics daemon\n        DVC_NO_ANALYTICS: \"true\",\n    }\n    env.pop(\"DVC_TEST\", None)\n    env.pop(\"CI\", None)\n\n    output = subprocess.check_output(\n        [*_get_dvc_args(), \"version\", \"-vv\"],\n        env=env,\n        text=True,\n    )\n\n    match = re.search(\n        r\".*Spawned .*updater.* with pid (.*)\", output, flags=re.MULTILINE\n    )\n    assert match, \"no match for the pid\"\n    pid = int(match.group(1).strip())\n\n    with suppress(psutil.NoSuchProcess):\n        psutil.Process(pid).wait(timeout=10)\n\n    log_contents = logfile.read_text(encoding=\"utf8\")\n    expected_line = (f\"Process {pid} \" if os.name != \"nt\" else \"\") + \"exiting with 0\"\n    assert expected_line in log_contents\n\n    assert server.RequestHandlerClass.hits == {\"GET\": 1}\n    # check that the file is saved correctly\n    updater_file = Path(dvc.tmp_dir) / Updater.UPDATER_FILE\n    assert json.loads(updater_file.read_text(encoding=\"utf8\")) == UPDATER_INFO\n\n\nif __name__ == \"__main__\":\n    # python -m tests.func.test_daemon [<port>]\n    port = int(sys.argv[1]) if len(sys.argv) >= 2 else 0\n    with make_server(port) as httpd:\n        print(  # noqa:  T201\n            \"Running server on http://{}:{}\".format(*httpd.server_address)\n        )\n        httpd.serve_forever()\n"
  },
  {
    "path": "tests/func/test_data_cloud.py",
    "content": "import logging\nimport os\nimport shutil\nfrom os.path import join\n\nimport pytest\n\nimport dvc_data\nfrom dvc.cli import main\nfrom dvc.dvcfile import FileMixin, SingleStageFile\nfrom dvc.exceptions import CheckoutError\nfrom dvc.repo.open_repo import clean_repos\nfrom dvc.scm import CloneError\nfrom dvc.stage.exceptions import StageNotFound\nfrom dvc.testing.remote_tests import TestRemote  # noqa: F401\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.db import HashFileDB\nfrom dvc_data.hashfile.db.local import LocalHashFileDB\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom tests.func.test_checkout import empty_checkout, empty_stats\n\nempty_stats = empty_stats | {\"fetched\": 0}\nempty_pull = empty_checkout | {\"stats\": empty_stats}\n\n\ndef test_cloud_cli(tmp_dir, dvc, capsys, remote, mocker):  # noqa: PLR0915\n    jobs = 2\n    args = [\"-v\", \"-j\", str(jobs)]\n\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    cache = stage.outs[0].cache_path\n\n    (stage_dir,) = tmp_dir.dvc_gen(\n        {\n            \"data_dir\": {\n                \"data_sub_dir\": {\"data_sub\": \"data_sub\"},\n                \"data\": \"data\",\n                \"empty\": \"\",\n            }\n        }\n    )\n    assert stage_dir is not None\n    cache_dir = stage_dir.outs[0].cache_path\n\n    # FIXME check status output\n    oids_exist = mocker.spy(LocalHashFileDB, \"oids_exist\")\n\n    assert main([\"push\", *args]) == 0\n    assert capsys.readouterr().out == \"5 files pushed\\n\"\n    assert os.path.exists(cache)\n    assert os.path.isfile(cache)\n    assert os.path.isfile(cache_dir)\n    assert oids_exist.called\n    assert all(\n        _kwargs[\"jobs\"] == jobs for (_args, _kwargs) in oids_exist.call_args_list\n    )\n\n    dvc.cache.local.clear()\n    oids_exist.reset_mock()\n\n    assert main([\"fetch\", *args]) == 0\n    assert capsys.readouterr().out == \"5 files fetched\\n\"\n    assert os.path.exists(cache)\n    assert os.path.isfile(cache)\n    assert os.path.isfile(cache_dir)\n    assert oids_exist.called\n    assert all(\n        _kwargs[\"jobs\"] == jobs for (_args, _kwargs) in oids_exist.call_args_list\n    )\n\n    oids_exist.reset_mock()\n\n    assert main([\"pull\", *args]) == 0\n    assert capsys.readouterr().out == \"Everything is up to date.\\n\"\n    assert os.path.exists(cache)\n    assert os.path.isfile(cache)\n    assert os.path.isfile(cache_dir)\n    assert os.path.isfile(\"foo\")\n    assert os.path.isdir(\"data_dir\")\n    assert oids_exist.called\n    assert all(\n        _kwargs[\"jobs\"] == jobs for (_args, _kwargs) in oids_exist.call_args_list\n    )\n\n    with open(cache, encoding=\"utf-8\") as fd:\n        assert fd.read() == \"foo\"\n    assert os.path.isfile(cache_dir)\n\n    # NOTE: http doesn't support gc yet\n    if remote.url.startswith(\"http\"):\n        return\n\n    oids_exist.reset_mock()\n\n    _list_oids_traverse = mocker.spy(HashFileDB, \"_list_oids_traverse\")\n    # NOTE: check if remote gc works correctly on directories\n    assert main([\"gc\", \"-cw\", \"-f\", *args]) == 0\n    assert _list_oids_traverse.called\n    assert all(_kwargs[\"jobs\"] == 2 for (_args, _kwargs) in oids_exist.call_args_list)\n    shutil.move(dvc.cache.local.path, dvc.cache.local.path + \".back\")\n\n    assert main([\"fetch\", *args]) == 0\n    assert capsys.readouterr().out == \"5 files fetched\\n\"\n\n    assert oids_exist.called\n    assert all(\n        _kwargs[\"jobs\"] == jobs for (_args, _kwargs) in oids_exist.call_args_list\n    )\n\n    oids_exist.reset_mock()\n    assert main([\"pull\", \"-f\", *args]) == 0\n    assert capsys.readouterr().out == \"Everything is up to date.\\n\"\n    assert os.path.exists(cache)\n    assert os.path.isfile(cache)\n    assert os.path.isfile(cache_dir)\n    assert os.path.isfile(\"foo\")\n    assert os.path.isdir(\"data_dir\")\n    assert oids_exist.called\n    assert all(\n        _kwargs[\"jobs\"] == jobs for (_args, _kwargs) in oids_exist.call_args_list\n    )\n\n\ndef test_data_cloud_error_cli(dvc):\n    f = \"non-existing-file\"\n    assert main([\"status\", \"-c\", f])\n    assert main([\"push\", f])\n    assert main([\"pull\", f])\n    assert main([\"fetch\", f])\n\n\ndef test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog):\n    stage = dvc.run(outs=[\"bar\"], cmd=\"echo bar > bar\", name=\"gen-bar\")\n    dvc.push()\n\n    stage.outs[0].hash_info = HashInfo()\n    stage.dump()\n\n    with caplog.at_level(logging.WARNING, logger=\"dvc\"):\n        caplog.clear()\n        assert main([\"status\", \"-c\"]) == 0\n        expected_warning = (\n            \"Output 'bar'(stage: 'gen-bar') is missing version info. \"\n            \"Cache for it will not be collected. \"\n            \"Use `dvc repro` to get your pipeline up to date.\"\n        )\n\n        assert expected_warning in caplog.text\n\n\ndef test_hash_recalculation(mocker, dvc, tmp_dir, local_remote):\n    tmp_dir.gen({\"foo\": \"foo\"})\n    test_file_md5 = mocker.spy(dvc_data.hashfile.hash, \"file_md5\")\n    ret = main([\"config\", \"cache.type\", \"hardlink\"])\n    assert ret == 0\n    ret = main([\"add\", \"foo\"])\n    assert ret == 0\n    ret = main([\"push\"])\n    assert ret == 0\n    assert test_file_md5.mock.call_count == 3\n\n\ndef test_missing_cache(tmp_dir, dvc, local_remote, caplog):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    # purge cache\n    dvc.cache.local.clear()\n\n    header = (\n        \"Some of the cache files do not exist \"\n        \"neither locally nor on remote. Missing cache files:\\n\"\n    )\n    foo = \"md5: 37b51d194a7513e45b56f6524f2d51f2\\n\"\n    bar = \"md5: acbd18db4cc2f85cedef654fccc4a4d8\\n\"\n\n    caplog.clear()\n    dvc.push()\n    assert header in caplog.text\n    assert foo in caplog.text\n    assert bar in caplog.text\n\n    caplog.clear()\n    dvc.fetch()\n    assert header in caplog.text\n    assert foo in caplog.text\n    assert bar in caplog.text\n\n    caplog.clear()\n    assert dvc.status(cloud=True) == {\"bar\": \"missing\", \"foo\": \"missing\"}\n    assert header not in caplog.text\n    assert foo not in caplog.text\n    assert bar not in caplog.text\n\n\ndef test_verify_hashes(tmp_dir, scm, dvc, mocker, tmp_path_factory, local_remote):\n    tmp_dir.dvc_gen({\"file\": \"file1 content\"}, commit=\"add file\")\n    tmp_dir.dvc_gen({\"dir\": {\"subfile\": \"file2 content\"}}, commit=\"add dir\")\n    dvc.push()\n\n    # remove artifacts and cache to trigger fetching\n    remove(\"file\")\n    remove(\"dir\")\n    dvc.cache.local.clear()\n\n    hash_spy = mocker.spy(dvc_data.hashfile.hash, \"file_md5\")\n\n    assert dvc.pull() == empty_pull | {\n        \"added\": [\"dir\" + os.sep, \"file\"],\n        \"stats\": empty_stats | {\"fetched\": 3, \"added\": 2},\n    }\n\n    # NOTE: 2 are for index.data_tree building\n    assert hash_spy.call_count == 3\n\n    # Removing cache will invalidate existing state entries\n    dvc.cache.local.clear()\n\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"upstream\"][\"verify\"] = True\n\n    assert dvc.pull() == empty_pull | {\"stats\": empty_stats | {\"fetched\": 3}}\n    assert hash_spy.call_count == 10\n\n\n# @pytest.mark.flaky(reruns=3)\n@pytest.mark.parametrize(\"erepo_type\", [\"git_dir\", \"erepo_dir\"])\ndef test_pull_git_imports(request, tmp_dir, dvc, scm, erepo_type):\n    erepo = request.getfixturevalue(erepo_type)\n    with erepo.chdir():\n        erepo.scm_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"second\")\n        erepo.scm_gen(\"foo\", \"foo\", commit=\"first\")\n\n    dvc.imp(os.fspath(erepo), \"foo\")\n    dvc.imp(os.fspath(erepo), \"dir\", out=\"new_dir\", rev=\"HEAD~\")\n\n    assert dvc.pull() == empty_pull\n\n    for item in [\"foo\", \"new_dir\"]:\n        remove(item)\n    dvc.cache.local.clear()\n    os.makedirs(dvc.cache.local.path, exist_ok=True)\n    clean_repos()\n\n    assert dvc.pull(force=True) == empty_pull | {\n        \"added\": [\"new_dir\" + os.sep, \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 2, \"added\": 2},\n    }\n\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n\n    assert (tmp_dir / \"new_dir\").exists()\n    assert (tmp_dir / \"new_dir\" / \"bar\").read_text() == \"bar\"\n\n\ndef test_pull_external_dvc_imports(tmp_dir, dvc, scm, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"second\")\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"first\")\n\n        os.remove(\"foo\")\n        shutil.rmtree(\"dir\")\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\")\n    dvc.imp(os.fspath(erepo_dir), \"dir\", out=\"new_dir\", rev=\"HEAD~\")\n\n    assert dvc.pull() == empty_pull\n\n    clean([\"foo\", \"new_dir\"], dvc)\n\n    assert dvc.pull(force=True) == empty_pull | {\n        \"added\": [\"new_dir\" + os.sep, \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 2, \"added\": 2},\n    }\n\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n\n    assert (tmp_dir / \"new_dir\").exists()\n    assert (tmp_dir / \"new_dir\" / \"bar\").read_text() == \"bar\"\n\n\ndef test_pull_partial_import(tmp_dir, dvc, local_workspace):\n    local_workspace.gen(\"file\", \"file content\")\n    dst = tmp_dir / \"file\"\n    stage = dvc.imp_url(\"remote://workspace/file\", os.fspath(dst), no_download=True)\n\n    result = dvc.pull(\"file\")\n    assert result == empty_checkout | {\n        \"added\": [\"file\"],\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 1},\n    }\n    assert dst.exists()\n\n    assert stage.outs[0].get_hash().value == \"d10b4c3ff123b26dc068d43a8bef2d23\"\n\n\ndef test_pull_partial_import_missing(tmp_dir, dvc, local_workspace):\n    local_workspace.gen(\"file\", \"file content\")\n    dst = tmp_dir / \"file\"\n    dvc.imp_url(\"remote://workspace/file\", os.fspath(dst), no_download=True)\n\n    (local_workspace / \"file\").unlink()\n    with pytest.raises(CheckoutError) as exc:\n        dvc.pull(\"file\")\n    assert exc.value.result == empty_pull | {\"failed\": [\"file\"]}\n    assert not dst.exists()\n\n\ndef test_pull_partial_import_modified(tmp_dir, dvc, local_workspace):\n    local_workspace.gen(\"file\", \"file content\")\n    dst = tmp_dir / \"file\"\n    dvc.imp_url(\"remote://workspace/file\", os.fspath(dst), no_download=True)\n\n    local_workspace.gen(\"file\", \"updated file content\")\n    with pytest.raises(CheckoutError) as exc:\n        dvc.pull(\"file\")\n    assert exc.value.result == empty_pull | {\"failed\": [\"file\"]}\n    assert not dst.exists()\n\n\ndef test_pull_external_dvc_imports_mixed(tmp_dir, dvc, scm, erepo_dir, local_remote):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\", commit=\"first\")\n        os.remove(\"foo\")\n\n    # imported: foo\n    dvc.imp(os.fspath(erepo_dir), \"foo\")\n\n    # local-object: bar\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n    dvc.push(\"bar\")\n\n    clean([\"foo\", \"bar\"], dvc)\n\n    assert dvc.pull() == empty_pull | {\n        \"added\": [\"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 2, \"added\": 2},\n    }\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"bar\").read_text() == \"bar\"\n\n\ndef clean(outs, dvc=None):\n    if dvc:\n        dvc.cache.local.clear()\n    for path in outs:\n        remove(path)\n    if dvc:\n        clean_repos()\n\n\ndef recurse_list_dir(d):\n    return [\n        os.path.join(root, f) for root, _, filenames in os.walk(d) for f in filenames\n    ]\n\n\ndef test_dvc_pull_pipeline_stages(tmp_dir, dvc, run_copy, local_remote):\n    (stage0,) = tmp_dir.dvc_gen(\"bar\", \"bar\")\n    stage1 = run_copy(\"bar\", \"foo\", name=\"copy-bar-foo\")\n    stage2 = run_copy(\"foo\", \"foobar\", name=\"copy-foo-foobar\")\n    dvc.push()\n\n    outs = [\"bar\", \"foo\", \"foobar\"]\n\n    clean(outs, dvc)\n    assert dvc.pull() == empty_pull | {\n        \"added\": outs,\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 3},\n    }\n    assert all((tmp_dir / file).exists() for file in outs)\n\n    for out, stage in zip(outs, [stage0, stage1, stage2]):\n        for target in [stage.addressing, out]:\n            clean(outs, dvc)\n            stats = dvc.pull([target])\n            assert stats == empty_pull | {\n                \"added\": [out],\n                \"stats\": empty_stats | {\"fetched\": 1, \"added\": 1},\n            }\n            assert os.path.exists(out)\n            assert not any(os.path.exists(out) for out in set(outs) - {out})\n\n    clean(outs, dvc)\n    stats = dvc.pull([stage2.addressing], with_deps=True)\n    assert stats == empty_pull | {\n        \"added\": outs,\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 3},\n    }\n\n    clean(outs, dvc)\n    stats = dvc.pull([os.curdir], recursive=True)\n    assert stats == empty_pull | {\n        \"added\": outs,\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 3},\n    }\n\n\ndef test_pipeline_file_target_ops(tmp_dir, dvc, run_copy, local_remote):\n    path = local_remote.url\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    tmp_dir.dvc_gen(\"lorem\", \"lorem\")\n    run_copy(\"lorem\", \"lorem2\", name=\"copy-lorem-lorem2\")\n\n    tmp_dir.dvc_gen(\"ipsum\", \"ipsum\")\n    run_copy(\"ipsum\", \"baz\", name=\"copy-ipsum-baz\")\n\n    outs = [\"foo\", \"lorem\", \"ipsum\", \"baz\", \"lorem2\"]\n\n    remove(dvc.stage_cache.cache_dir)\n\n    assert dvc.push() == 3\n\n    outs = [\"foo\", \"lorem\", \"ipsum\", \"baz\", \"lorem2\"]\n\n    # each one's a copy of other, hence 3\n    assert len(recurse_list_dir(path)) == 3\n\n    clean(outs, dvc)\n    assert dvc.pull([\"dvc.yaml\"]) == empty_pull | {\n        \"added\": [\"baz\", \"lorem2\"],\n        \"stats\": empty_stats | {\"fetched\": 2, \"added\": 2},\n    }\n\n    clean(outs, dvc)\n    assert dvc.pull() == empty_pull | {\n        \"added\": [\"baz\", \"foo\", \"ipsum\", \"lorem\", \"lorem2\"],\n        \"stats\": empty_stats | {\"fetched\": 3, \"added\": 5},\n    }\n\n    # clean everything in remote and push\n    from dvc.testing.tmp_dir import TmpDir\n\n    clean(TmpDir(path).iterdir())\n    assert dvc.push([\"dvc.yaml:copy-ipsum-baz\"]) == 1\n    assert len(recurse_list_dir(path)) == 1\n\n    clean(TmpDir(path).iterdir())\n    assert dvc.push([\"dvc.yaml\"]) == 2\n    assert len(recurse_list_dir(path)) == 2\n\n    with pytest.raises(StageNotFound):\n        dvc.push([\"dvc.yaml:StageThatDoesNotExist\"])\n\n    with pytest.raises(StageNotFound):\n        dvc.pull([\"dvc.yaml:StageThatDoesNotExist\"])\n\n\n@pytest.mark.parametrize(\n    \"fs, msg\",\n    [\n        ({\"foo\": \"foo\", \"bar\": \"bar\"}, \"2 files pushed\"),\n        ({\"foo\": \"foo\"}, \"1 file pushed\"),\n        ({}, \"Everything is up to date\"),\n    ],\n)\ndef test_push_stats(tmp_dir, dvc, fs, msg, capsys, local_remote):\n    tmp_dir.dvc_gen(fs)\n\n    main([\"push\"])\n    out, _ = capsys.readouterr()\n    assert msg in out\n\n\n@pytest.mark.parametrize(\n    \"fs, msg\",\n    [\n        ({\"foo\": \"foo\", \"bar\": \"bar\"}, \"2 files fetched\"),\n        ({\"foo\": \"foo\"}, \"1 file fetched\"),\n        ({}, \"Everything is up to date.\"),\n    ],\n)\ndef test_fetch_stats(tmp_dir, dvc, fs, msg, capsys, local_remote):\n    tmp_dir.dvc_gen(fs)\n    dvc.push()\n    clean(list(fs.keys()), dvc)\n\n    main([\"fetch\"])\n    out, _ = capsys.readouterr()\n    assert msg in out\n\n\ndef test_pull_stats(tmp_dir, dvc, capsys, local_remote):\n    tmp_dir.dvc_gen(\n        {\n            \"foo\": \"foo\",\n            \"bar\": \"bar\",\n            \"lorem\": \"lorem\",\n            \"dir\": {\"file\": \"file\"},\n            \"ipsum\": \"ipsum\",\n            \"dolor\": \"dolor\",\n        }\n    )\n    dvc.push()\n    clean([\"foo\", \"bar\", \"dir\", \"lorem\"], dvc)\n\n    (tmp_dir / \"ipsum.dvc\").unlink()\n    (tmp_dir / \"bar\").write_text(\"foobar\")\n\n    assert main([\"pull\", \"--force\"]) == 0\n    out, _ = capsys.readouterr()\n    assert out.splitlines() == [\n        \"M\\tbar\".expandtabs(),\n        \"A\\tdir\".expandtabs() + os.sep,\n        \"A\\tfoo\".expandtabs(),\n        \"A\\tlorem\".expandtabs(),\n        \"D\\tipsum\".expandtabs(),\n        \"6 files fetched, 1 file modified, 3 files added and 1 file deleted\",\n    ]\n\n    main([\"pull\"])\n    out, _ = capsys.readouterr()\n    assert out == \"Everything is up to date.\\n\"\n\n\n@pytest.mark.parametrize(\n    \"key,expected\", [(\"all_tags\", 2), (\"all_branches\", 3), (\"all_commits\", 3)]\n)\ndef test_push_pull_all(tmp_dir, scm, dvc, local_remote, key, expected):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"first\")\n    scm.tag(\"v1\")\n    dvc.remove(\"foo.dvc\")\n    tmp_dir.dvc_gen({\"bar\": \"bar\"}, commit=\"second\")\n    scm.tag(\"v2\")\n    with tmp_dir.branch(\"branch\", new=True):\n        dvc.remove(\"bar.dvc\")\n        tmp_dir.dvc_gen({\"baz\": \"baz\"}, commit=\"branch\")\n\n    assert dvc.push(**{key: True}) == expected\n\n    clean([\"foo\", \"bar\", \"baz\"], dvc)\n    assert dvc.pull(**{key: True}) == empty_pull | {\n        \"added\": [\"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": expected, \"added\": 2},\n    }\n\n\ndef test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, local_remote):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    assert dvc.push(\"copy-foo-bar\") == 1\n    assert len(recurse_list_dir(local_remote.url)) == 1\n    # pushing everything so as we can check pull/fetch only downloads\n    # from specified targets\n    assert dvc.push() == 0\n    clean([\"foo\", \"bar\"], dvc)\n\n    assert dvc.pull(\"copy-foo-bar\") == empty_pull | {\n        \"added\": [\"bar\"],\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 1},\n    }\n    assert (tmp_dir / \"bar\").exists()\n    assert len(recurse_list_dir(dvc.cache.local.path)) == 1\n    clean([\"bar\"], dvc)\n\n    assert dvc.fetch(\"copy-foo-bar\") == 1\n    assert len(recurse_list_dir(dvc.cache.local.path)) == 1\n\n\ndef test_pull_partial(tmp_dir, dvc, local_remote):\n    other_files = {f\"spam{i}\": f\"spam{i}\" for i in range(10)}\n    tmp_dir.dvc_gen({\"foo\": {\"bar\": {\"baz\": \"baz\"}, **other_files}})\n    dvc.push()\n    clean([\"foo\"], dvc)\n\n    stats = dvc.pull(os.path.join(\"foo\", \"bar\"))\n    assert stats == empty_pull | {\n        \"added\": [os.path.join(\"foo\", \"\")],\n        \"stats\": empty_stats | {\"fetched\": 2, \"added\": 1},\n    }\n    assert (tmp_dir / \"foo\").read_text() == {\"bar\": {\"baz\": \"baz\"}}\n\n\ndef test_output_remote(tmp_dir, dvc, make_remote):\n    make_remote(\"default\", default=True)\n    make_remote(\"for_foo\", default=False)\n    make_remote(\"for_data\", default=False)\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"data\", {\"one\": \"one\", \"two\": \"two\"})\n\n    with (tmp_dir / \"foo.dvc\").modify() as d:\n        d[\"outs\"][0][\"remote\"] = \"for_foo\"\n\n    with (tmp_dir / \"data.dvc\").modify() as d:\n        d[\"outs\"][0][\"remote\"] = \"for_data\"\n\n    dvc.push()\n\n    default = dvc.cloud.get_remote_odb(\"default\")\n    for_foo = dvc.cloud.get_remote_odb(\"for_foo\")\n    for_data = dvc.cloud.get_remote_odb(\"for_data\")\n\n    assert set(default.all()) == {\"37b51d194a7513e45b56f6524f2d51f2\"}\n    assert set(for_foo.all()) == {\"acbd18db4cc2f85cedef654fccc4a4d8\"}\n    assert set(for_data.all()) == {\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n\n    clean([\"foo\", \"bar\", \"data\"], dvc)\n\n    assert dvc.pull() == empty_pull | {\n        \"added\": [\"data\" + os.sep, \"bar\", \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 5, \"added\": 4},\n    }\n\n    assert set(dvc.cache.local.all()) == {\n        \"37b51d194a7513e45b56f6524f2d51f2\",\n        \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n\n\ndef test_target_remote(tmp_dir, dvc, make_remote):\n    make_remote(\"default\", default=True)\n    make_remote(\"myremote\", default=False)\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"data\", {\"one\": \"one\", \"two\": \"two\"})\n\n    dvc.push(remote=\"myremote\")\n\n    default = dvc.cloud.get_remote_odb(\"default\")\n    myremote = dvc.cloud.get_remote_odb(\"myremote\")\n\n    assert set(default.all()) == set()\n    assert set(myremote.all()) == {\n        \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n\n    clean([\"foo\", \"data\"], dvc)\n\n    assert dvc.pull(remote=\"myremote\") == empty_pull | {\n        \"added\": [\"data\" + os.sep, \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 4, \"added\": 3},\n    }\n\n    assert set(dvc.cache.local.all()) == {\n        \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n\n\ndef test_output_target_remote(tmp_dir, dvc, make_remote):\n    make_remote(\"default\", default=True)\n    make_remote(\"for_foo\", default=False)\n    make_remote(\"for_bar\", default=False)\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n    tmp_dir.dvc_gen(\"data\", {\"one\": \"one\", \"two\": \"two\"})\n\n    with (tmp_dir / \"foo.dvc\").modify() as d:\n        d[\"outs\"][0][\"remote\"] = \"for_foo\"\n\n    with (tmp_dir / \"bar.dvc\").modify() as d:\n        d[\"outs\"][0][\"remote\"] = \"for_bar\"\n\n    # push foo and data to for_foo remote\n    dvc.push(remote=\"for_foo\")\n\n    default = dvc.cloud.get_remote_odb(\"default\")\n    for_foo = dvc.cloud.get_remote_odb(\"for_foo\")\n    for_bar = dvc.cloud.get_remote_odb(\"for_bar\")\n\n    # hashes for foo and data, but not bar\n    expected = {\n        \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n\n    assert set(default.all()) == set()\n    assert set(for_foo.all()) == expected\n    assert set(for_bar.all()) == set()\n\n    # push everything without specifying remote\n    dvc.push()\n    assert set(default.all()) == {\n        \"f97c5d29941bfb1b2fdab0874906ab82\",\n        \"6b18131dc289fd37006705affe961ef8.dir\",\n        \"b8a9f715dbb64fd5c56e7783c6820a61\",\n    }\n    assert set(for_foo.all()) == expected\n    assert set(for_bar.all()) == {\"37b51d194a7513e45b56f6524f2d51f2\"}\n\n    clean([\"foo\", \"bar\", \"data\"], dvc)\n\n    # pull foo and data from for_foo remote\n    assert dvc.pull(remote=\"for_foo\", allow_missing=True) == empty_pull | {\n        \"added\": [\"data\" + os.sep, \"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 4, \"added\": 3},\n    }\n\n    assert set(dvc.cache.local.all()) == expected\n\n\ndef test_pull_allow_missing(tmp_dir, dvc, local_remote):\n    dvc.stage.add(name=\"bar\", outs=[\"bar\"], cmd=\"echo bar > bar\")\n\n    with pytest.raises(CheckoutError) as exc:\n        dvc.pull()\n    assert exc.value.result == empty_pull | {\"failed\": [\"bar\"]}\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.push()\n    clean([\"foo\"], dvc)\n\n    stats = dvc.pull(allow_missing=True)\n    assert stats == empty_pull | {\n        \"added\": [\"foo\"],\n        \"stats\": empty_stats | {\"fetched\": 1, \"added\": 1},\n    }\n\n\ndef test_pull_granular_excluding_import_that_cannot_be_pulled(\n    tmp_dir, dvc, local_remote, mocker\n):\n    \"\"\"Regression test for https://github.com/treeverse/dvc/issues/10309.\"\"\"\n\n    mocker.patch(\"dvc.fs.dvc._DVCFileSystem\", side_effect=CloneError(\"SCM error\"))\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    imp_stage = dvc.imp(\n        \"https://user:token@github.com/treeverse/dvc.git\",\n        \"dir\",\n        out=\"new_dir\",\n        rev=\"HEAD\",\n        no_exec=True,\n    )\n    dvc.push()\n\n    shutil.rmtree(\"dir\")\n    dvc.cache.local.clear()\n\n    assert dvc.pull(stage.addressing) == empty_pull | {\n        \"added\": [join(\"dir\", \"\")],\n        \"stats\": empty_stats | {\"added\": 2, \"fetched\": 3},\n    }\n\n    with pytest.raises(CloneError, match=\"SCM error\"):\n        dvc.pull()\n    with pytest.raises(CloneError, match=\"SCM error\"):\n        dvc.pull(imp_stage.addressing)\n\n\ndef test_loads_single_file(tmp_dir, dvc, local_remote, mocker):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.dvc_gen(\"bar\", \"bar\")\n\n    foo_dvcfile = SingleStageFile(dvc, \"foo.dvc\")\n    bar_dvcfile = SingleStageFile(dvc, \"bar.dvc\")\n\n    spy = mocker.spy(FileMixin, \"_load\")\n    assert dvc.push(\"foo.dvc\") == 1\n    spy.assert_called_with(foo_dvcfile)\n    spy.reset_mock()\n\n    assert dvc.push(\"bar.dvc\") == 1\n    spy.assert_called_with(bar_dvcfile)\n    spy.reset_mock()\n\n    dvc.cache.local.clear()\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / \"foo\").unlink()\n\n    assert dvc.pull(\"foo.dvc\") == {\n        \"added\": [\"foo\"],\n        \"deleted\": [],\n        \"modified\": [],\n        \"stats\": {\"added\": 1, \"deleted\": 0, \"modified\": 0, \"fetched\": 1},\n    }\n    spy.assert_called_with(foo_dvcfile)\n    assert (tmp_dir / \"foo\").exists()\n    assert not (tmp_dir / \"bar\").exists()\n    spy.reset_mock()\n\n    assert dvc.fetch(\"bar.dvc\") == 1\n    spy.assert_called_with(bar_dvcfile)\n"
  },
  {
    "path": "tests/func/test_data_status.py",
    "content": "import shutil\nfrom collections.abc import Iterable\nfrom functools import partial\nfrom os import fspath\nfrom os.path import join\nfrom typing import TYPE_CHECKING\n\nimport pytest\n\nfrom dvc.repo import Repo\nfrom dvc.repo.data import _transform_git_paths_to_dvc, posixpath_to_os_path\nfrom dvc.testing import matchers as M\nfrom dvc.testing.tmp_dir import TmpDir, make_subrepo\nfrom dvc.utils.fs import remove\n\nif TYPE_CHECKING:\n    from dvc.stage import Stage\n\nEMPTY_STATUS = {\n    \"committed\": {},\n    \"uncommitted\": {},\n    \"git\": {},\n    \"not_in_cache\": [],\n    \"not_in_remote\": [],\n    \"unchanged\": [],\n    \"untracked\": [],\n}\n\n\n@pytest.mark.parametrize(\"path\", [None, (\"sub\", \"repo\")])\ndef test_git_to_dvc_path_wdir_transformation(tmp_dir, scm, path):\n    struct = {\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}, \"file\": \"file\", \"dir2\": {}}\n    tmp_dir.gen(struct)\n\n    subdir = tmp_dir.joinpath(*path) if path else tmp_dir\n    make_subrepo(subdir, scm)\n    dvc = subdir.dvc\n\n    with subdir.chdir():\n        subdir.gen(struct)\n        _, _, untracked = scm.status(untracked_files=\"all\")\n        # make order independent of the platforms for easier test assertions\n        untracked = sorted(map(posixpath_to_os_path, untracked), reverse=True)\n        assert _transform_git_paths_to_dvc(dvc, untracked) == [\n            \"file\",\n            join(\"dir\", \"foo\"),\n            join(\"dir\", \"bar\"),\n        ]\n        with (subdir / \"dir\").chdir():\n            assert _transform_git_paths_to_dvc(dvc, untracked) == [\n                join(\"..\", \"file\"),\n                \"foo\",\n                \"bar\",\n            ]\n        with (subdir / \"dir2\").chdir():\n            assert _transform_git_paths_to_dvc(dvc, untracked) == [\n                join(\"..\", \"file\"),\n                join(\"..\", \"dir\", \"foo\"),\n                join(\"..\", \"dir\", \"bar\"),\n            ]\n\n\ndef test_file(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n    tmp_dir.dvc_gen(\"foo\", \"foobar\")\n    remove(tmp_dir / \"foo\")\n\n    expected = {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [\"foo\"]},\n        \"uncommitted\": {\"deleted\": [\"foo\"]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status() == expected\n    assert dvc.data_status(granular=True) == expected\n\n\ndef test_directory(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\"}}, commit=\"add dir\")\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"foobar\": \"foobar\"}})\n    remove(tmp_dir / \"dir\")\n    (tmp_dir / \"dir\").gen({\"foo\": \"foo\", \"bar\": \"barr\", \"baz\": \"baz\"})\n    tmp_dir.gen(\"untracked\", \"untracked\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [join(\"dir\", \"\")]},\n        \"uncommitted\": {\"modified\": [join(\"dir\", \"\")]},\n        \"git\": M.dict(),\n    }\n\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"committed\": {\n            \"added\": M.unordered(join(\"dir\", \"bar\"), join(\"dir\", \"foobar\")),\n            \"modified\": [join(\"dir\", \"\")],\n        },\n        \"uncommitted\": {\n            \"added\": [join(\"dir\", \"baz\")],\n            \"modified\": M.unordered(join(\"dir\", \"\"), join(\"dir\", \"bar\")),\n            \"deleted\": [join(\"dir\", \"foobar\")],\n        },\n        \"git\": M.dict(),\n        \"not_in_cache\": [],\n        \"unchanged\": [join(\"dir\", \"foo\")],\n        \"untracked\": [\"untracked\"],\n    }\n\n\ndef test_tracked_directory_deep(tmp_dir, dvc, scm):\n    \"\"\"Test for a directory not in cwd, but nested inside other directories.\"\"\"\n    (tmp_dir / \"sub\").gen({\"dir\": {\"foo\": \"foo\"}})\n    dvc.add(fspath(tmp_dir / \"sub\" / \"dir\"))\n    scm.add_commit([\"sub/dir.dvc\", \"sub/.gitignore\"], message=\"add sub/dir\")\n\n    (tmp_dir / \"sub\" / \"dir\").gen(\"bar\", \"bar\")\n    dvc.commit(None, force=True)\n    (tmp_dir / \"sub\" / \"dir\").gen(\"foobar\", \"foobar\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [join(\"sub\", \"dir\", \"\")]},\n        \"uncommitted\": {\"modified\": [join(\"sub\", \"dir\", \"\")]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"committed\": {\n            \"added\": [join(\"sub\", \"dir\", \"bar\")],\n            \"modified\": [join(\"sub\", \"dir\", \"\")],\n        },\n        \"uncommitted\": {\n            \"added\": [join(\"sub\", \"dir\", \"foobar\")],\n            \"modified\": [join(\"sub\", \"dir\", \"\")],\n        },\n        \"git\": M.dict(),\n        \"unchanged\": [join(\"sub\", \"dir\", \"foo\")],\n    }\n\n\n@pytest.mark.parametrize(\"git_repo_state\", [\"unborn\", \"committed\"])\n@pytest.mark.parametrize(\"subdir\", [True, False])\ndef test_new_dvc_repo(tmp_dir, scm, subdir, git_repo_state):\n    if git_repo_state == \"committed\":\n        tmp_dir.scm_gen(\"test\", \"test\", commit=\"init\")\n\n    is_empty = git_repo_state == \"unborn\"\n    dir_ = tmp_dir / \"sub\" if subdir else tmp_dir\n    dvc = Repo.init(dir_, subdir=subdir)\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"git\": M.dict(is_dirty=True, is_empty=is_empty),\n    }\n\n    dir_.gen(\"foo\", \"foo\")\n    dvc.add([dir_ / \"foo\"])\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"git\": M.dict(is_empty=is_empty, is_dirty=True),\n        \"committed\": {\"added\": [\"foo\"]},\n    }\n\n\ndef test_noscm_repo(tmp_dir, dvc):\n    assert dvc.data_status() == EMPTY_STATUS\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert dvc.data_status() == {**EMPTY_STATUS, \"unchanged\": [\"foo\"]}\n\n\ndef test_unchanged(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\"}}, commit=\"add dir\")\n    tmp_dir.dvc_gen(\"bar\", \"bar\", commit=\"add foo\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"git\": M.dict(),\n        \"unchanged\": M.unordered(\"bar\", join(\"dir\", \"\")),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"git\": M.dict(),\n        \"unchanged\": M.unordered(\"bar\", join(\"dir\", \"\"), join(\"dir\", \"foo\")),\n    }\n\n\ndef test_skip_uncached_pipeline_outputs(tmp_dir, dvc, run_copy_metrics):\n    tmp_dir.gen({\"m_temp.yaml\": str(5)})\n    run_copy_metrics(\n        \"m_temp.yaml\",\n        \"m.yaml\",\n        metrics_no_cache=[\"m.yaml\"],\n        name=\"copy-metrics\",\n    )\n    assert dvc.data_status() == EMPTY_STATUS\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == EMPTY_STATUS\n\n\ndef test_outs_with_no_hashes(tmp_dir, dvc, scm):\n    dvc.stage.add(single_stage=True, outs=[\"bar\"])\n    dvc.stage.add(deps=[\"bar\"], outs=[\"foo\"], name=\"copy\", cmd=\"cp foo bar\")\n\n    expected_output = {**EMPTY_STATUS, \"git\": M.dict()}\n    assert dvc.data_status() == expected_output\n    assert dvc.data_status(granular=True) == expected_output\n\n\ndef test_outs_with_no_hashes_and_with_uncommitted_files(tmp_dir, dvc, scm):\n    tmp_dir.gen({\"bar\": \"bar\", \"foo\": \"foo\"})\n    dvc.stage.add(single_stage=True, outs=[\"bar\"])\n    dvc.stage.add(deps=[\"bar\"], outs=[\"foo\"], name=\"copy\", cmd=\"cp foo bar\")\n\n    expected_output = {\n        **EMPTY_STATUS,\n        \"uncommitted\": {\"added\": M.unordered(\"bar\", \"foo\")},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status() == expected_output\n    assert dvc.data_status(granular=True) == expected_output\n\n\ndef test_subdir(tmp_dir, scm):\n    subrepo = tmp_dir / \"sub\"\n    make_subrepo(subrepo, scm)\n\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"foo\": \"foo\"}}, commit=\"add dir\")\n        subrepo.dvc_gen(\"bar\", \"bar\", commit=\"add foo\")\n        subrepo.gen(\"untracked\", \"untracked\")\n\n        dvc = subrepo.dvc\n        assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n            **EMPTY_STATUS,\n            \"git\": M.dict(),\n            \"unchanged\": M.unordered(\"bar\", join(\"dir\", \"\"), join(\"dir\", \"foo\")),\n            \"untracked\": [\"untracked\"],\n        }\n\n\ndef test_untracked_newly_added_files(tmp_dir, dvc, scm):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.gen(\"foobar\", \"foobar\")\n\n    expected = {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"bar\"), \"foobar\"),\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(untracked_files=\"all\") == expected\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == expected\n\n\ndef test_missing_cache_workspace_exists(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\")\n    remove(dvc.cache.repo.path)\n\n    assert dvc.data_status(untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n    }\n\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"uncommitted\": {\"unknown\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"bar\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n    }\n\n\ndef test_missing_cache_missing_workspace(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\")\n    for path in [dvc.cache.repo.path, \"dir\", \"foobar\"]:\n        remove(path)\n\n    assert dvc.data_status(untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"uncommitted\": {\"deleted\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n    }\n\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"uncommitted\": {\"deleted\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n    }\n\n\ndef test_git_committed_missing_cache_workspace_exists(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"add dir\")\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\", commit=\"add foobar\")\n    remove(dvc.cache.local.path)\n\n    assert dvc.data_status(untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n        \"unchanged\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"uncommitted\": {\"unknown\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"bar\"))},\n        \"git\": M.dict(),\n        \"unchanged\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n    }\n\n\ndef test_git_committed_missing_cache_missing_workspace(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"add dir\")\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\", commit=\"add foobar\")\n    for path in [dvc.cache.repo.path, \"dir\", \"foobar\"]:\n        remove(path)\n\n    assert dvc.data_status(untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"uncommitted\": {\"deleted\": M.unordered(join(\"dir\", \"\"), \"foobar\")},\n        \"not_in_cache\": M.unordered(join(\"dir\", \"\"), \"foobar\"),\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True, untracked_files=\"all\") == {\n        **EMPTY_STATUS,\n        \"committed\": {},\n        \"uncommitted\": {\"deleted\": M.unordered(join(\"dir\", \"\"), \"foobar\")},\n        \"not_in_cache\": M.unordered(join(\"dir\", \"\"), \"foobar\"),\n        \"git\": M.dict(),\n    }\n\n\ndef test_partial_missing_cache(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    # remove \"foo\" from cache\n    odb = dvc.cache.repo\n    odb.fs.rm(odb.oid_to_path(\"acbd18db4cc2f85cedef654fccc4a4d8\"))\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [join(\"dir\", \"\")]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\n            \"added\": M.unordered(\n                join(\"dir\", \"\"), join(\"dir\", \"foo\"), join(\"dir\", \"bar\")\n            )\n        },\n        \"not_in_cache\": [join(\"dir\", \"foo\")],\n        \"git\": M.dict(),\n    }\n\n\ndef test_missing_dir_object_from_head(tmp_dir, dvc, scm):\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"add dir\")\n    remove(\"dir\")\n    tmp_dir.dvc_gen({\"dir\": {\"foobar\": \"foobar\"}})\n    odb = dvc.cache.repo\n    odb.fs.rm(odb.oid_to_path(stage.outs[0].hash_info.value))\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [join(\"dir\", \"\")]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\n            \"modified\": [join(\"dir\", \"\")],\n            \"unknown\": [join(\"dir\", \"foobar\")],\n        },\n        \"git\": M.dict(),\n    }\n\n\ndef test_missing_dir_object_from_index(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"add dir\")\n    remove(\"dir\")\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"foobar\": \"foobar\"}})\n    odb = dvc.cache.repo\n    odb.fs.rm(odb.oid_to_path(stage.outs[0].hash_info.value))\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [join(\"dir\", \"\")]},\n        \"not_in_cache\": [join(\"dir\", \"\")],\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"modified\": [join(\"dir\", \"\")]},\n        \"uncommitted\": {\"unknown\": [join(\"dir\", \"foobar\")]},\n        \"not_in_cache\": [join(\"dir\", \"\")],\n        \"git\": M.dict(),\n    }\n\n\ndef test_remote_check(tmp_dir, dvc, scm, make_remote):\n    make_remote(\"default\", default=True)\n    make_remote(\"myremote\", default=False)\n\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\")\n    assert dvc.push() == 4\n    scm.add_commit([\"dir.dvc\", \"foobar.dvc\", \".gitignore\"], message=\"add files\")\n\n    entries = M.unordered(\"foobar\", join(\"dir\", \"\"))\n    granular_entries = M.unordered(\n        \"foobar\", join(\"dir\", \"\"), join(\"dir\", \"foo\"), join(\"dir\", \"bar\")\n    )\n    expected_ng = EMPTY_STATUS | {\"git\": M.dict(), \"unchanged\": entries}\n    expected_g = EMPTY_STATUS | {\n        \"not_in_remote\": [],\n        \"git\": M.dict(),\n        \"unchanged\": granular_entries,\n    }\n\n    opts = {\"not_in_remote\": True, \"remote_refresh\": True}\n    assert dvc.data_status(**opts) == expected_ng\n    assert dvc.data_status(granular=True, **opts) == expected_g\n\n    opts |= {\"remote\": \"myremote\"}\n    assert dvc.data_status(**opts) == expected_ng | {\"not_in_remote\": entries}\n    assert dvc.data_status(granular=True, **opts) == expected_g | {\n        \"not_in_remote\": granular_entries\n    }\n\n    dvc.push(remote=\"myremote\")\n\n    assert dvc.data_status(**opts) == expected_ng\n    assert dvc.data_status(granular=True, **opts) == expected_g\n\n\ndef test_missing_remote_cache(tmp_dir, dvc, scm, local_remote):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"git\": M.dict(),\n    }\n\n    assert dvc.data_status(untracked_files=\"all\", not_in_remote=True) == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"not_in_remote\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n    }\n\n    assert dvc.data_status(\n        granular=True, untracked_files=\"all\", not_in_remote=True\n    ) == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\n            \"added\": M.unordered(\n                \"foobar\",\n                join(\"dir\", \"\"),\n                join(\"dir\", \"foo\"),\n                join(\"dir\", \"bar\"),\n            )\n        },\n        \"uncommitted\": {},\n        \"not_in_remote\": M.unordered(\n            \"foobar\",\n            join(\"dir\", \"\"),\n            join(\"dir\", \"foo\"),\n            join(\"dir\", \"bar\"),\n        ),\n        \"git\": M.dict(),\n    }\n\n\ndef test_not_in_remote_respects_not_pushable(\n    tmp_dir: TmpDir, dvc: Repo, scm, mocker, local_remote\n):\n    stages: list[Stage] = tmp_dir.dvc_gen({\"foo\": \"foo\", \"dir\": {\"foobar\": \"foobar\"}})\n    # Make foo not pushable\n    stages[0].outs[0].can_push = False\n    stages[0].dump()\n\n    def assert_not_in_remote_is(\n        granular: bool, not_in_remote: list[str], committed: list[str]\n    ):\n        assert dvc.data_status(\n            granular=granular, remote_refresh=True, not_in_remote=True\n        ) == {\n            **EMPTY_STATUS,\n            \"git\": M.dict(),\n            \"not_in_remote\": M.unordered(*not_in_remote),\n            \"committed\": {\"added\": M.unordered(*committed)},\n        }\n\n    foo = \"foo\"\n    dir_ = join(\"dir\", \"\")\n    foobar = join(\"dir\", \"foobar\")\n\n    assert_not_in_remote_is(\n        granular=True,\n        not_in_remote=[dir_, foobar],\n        committed=[foo, dir_, foobar],\n    )\n    assert_not_in_remote_is(granular=False, not_in_remote=[dir_], committed=[foo, dir_])\n\n    dvc.push()\n\n    assert_not_in_remote_is(\n        granular=True,\n        not_in_remote=[],\n        committed=[foo, dir_, foobar],\n    )\n    assert_not_in_remote_is(\n        granular=False,\n        not_in_remote=[],\n        committed=[foo, dir_],\n    )\n\n\ndef test_root_from_dir_to_file(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    remove(\"data\")\n    tmp_dir.gen(\"data\", \"file\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [join(\"data\", \"\")]},\n        \"uncommitted\": {\"modified\": [\"data\"]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\n            \"added\": M.unordered(\n                join(\"data\", \"\"), join(\"data\", \"foo\"), join(\"data\", \"bar\")\n            )\n        },\n        \"uncommitted\": {\n            \"deleted\": M.unordered(join(\"data\", \"foo\"), join(\"data\", \"bar\")),\n            \"modified\": [\"data\"],\n        },\n        \"git\": M.dict(),\n    }\n\n\ndef test_root_from_file_to_dir(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"data\", \"file\")\n    remove(\"data\")\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [\"data\"]},\n        \"uncommitted\": {\"modified\": [join(\"data\", \"\")]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [\"data\"]},\n        \"uncommitted\": {\n            \"modified\": [join(\"data\", \"\")],\n            \"added\": M.unordered(join(\"data\", \"foo\"), join(\"data\", \"bar\")),\n        },\n        \"git\": M.dict(),\n    }\n\n\ndef test_empty_dir(tmp_dir, scm, dvc):\n    # regression testing for https://github.com/treeverse/dvc/issues/8958\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\"}})\n    remove(\"data\")\n\n    (tmp_dir / \"data\").mkdir()\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [join(\"data\", \"\")]},\n        \"uncommitted\": {\"modified\": [join(\"data\", \"\")]},\n        \"git\": M.dict(),\n    }\n\n\ndef test_untracked_files_filter_targets(tmp_dir, scm, dvc):\n    tmp_dir.gen(\n        {\"spam\": \"spam\", \"ham\": \"ham\", \"dir\": {\"eggs\": \"eggs\", \"bacon\": \"bacon\"}}\n    )\n    _default = EMPTY_STATUS | {\"git\": M.dict()}\n    status = partial(dvc.data_status, untracked_files=\"all\")\n\n    assert status([\"not-existing\"]) == _default\n\n    assert status([\"spam\"]) == _default | {\"untracked\": [\"spam\"]}\n    assert status([\"spam\", \"ham\"]) == _default | {\n        \"untracked\": M.unordered(\"spam\", \"ham\")\n    }\n    assert status([\"dir\"]) == _default | {\n        \"untracked\": M.unordered(join(\"dir\", \"eggs\"), join(\"dir\", \"bacon\")),\n    }\n    assert status([join(\"dir\", \"\")]) == _default | {\n        \"untracked\": M.unordered(join(\"dir\", \"eggs\"), join(\"dir\", \"bacon\")),\n    }\n    assert status([join(\"dir\", \"bacon\")]) == _default | {\n        \"untracked\": [join(\"dir\", \"bacon\")]\n    }\n\n\ndef param(*values):\n    \"\"\"Uses test id from the first value.\"\"\"\n    first = values[0]\n    _id = (\n        \",\".join(first)\n        if isinstance(first, Iterable) and not isinstance(first, str)\n        else first\n    )\n    return pytest.param(*values, id=_id)\n\n\n@pytest.mark.parametrize(\n    \"targets,expected\",\n    [\n        param(\n            [\"foo\"],\n            {\"committed\": {\"added\": [\"foo\"]}, \"uncommitted\": {\"deleted\": [\"foo\"]}},\n        ),\n        param(\n            [\"bar\"],\n            {\"committed\": {\"added\": [\"bar\"]}, \"uncommitted\": {\"modified\": [\"bar\"]}},\n        ),\n        param([\"foobar\"], {\"committed\": {\"added\": [\"foobar\"]}, \"uncommitted\": {}}),\n        param([\"not-existing\"], {}),\n        param([\"baz\"], {\"untracked\": [\"baz\"]}),\n        param(\n            [\"foo\", \"foobar\"],\n            {\n                \"committed\": {\"added\": M.unordered(\"foo\", \"foobar\")},\n                \"uncommitted\": {\"deleted\": [\"foo\"]},\n            },\n        ),\n    ],\n)\ndef test_filter_targets_files_after_dvc_commit(tmp_dir, dvc, scm, targets, expected):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\", \"foobar\": \"foobar\"})\n    (tmp_dir / \"foo\").unlink()  # deleted\n    tmp_dir.gen({\"bar\": \"bar modified\", \"baz\": \"baz new\"})\n\n    assert dvc.data_status(\n        targets=targets, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected | {\"git\": M.dict()}\n    assert dvc.data_status(\n        targets=targets, granular=True, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected | {\"git\": M.dict()}\n\n\n@pytest.mark.parametrize(\n    \"targets,expected\",\n    [\n        param([\"not-existing\"], {}),\n        param([\"foo\"], {\"uncommitted\": {\"deleted\": [\"foo\"]}}),\n        param([\"bar\"], {\"unchanged\": [\"bar\"]}),\n        param([\"baz\"], {\"unchanged\": [\"baz\"]}),\n        param([\"foobar\"], {\"unchanged\": [\"foobar\"]}),\n        param(\n            (\"foo\", \"foobar\"),\n            {\"unchanged\": [\"foobar\"], \"uncommitted\": {\"deleted\": [\"foo\"]}},\n        ),\n    ],\n)\ndef test_filter_targets_after_git_commit(tmp_dir, dvc, scm, targets, expected):\n    tmp_dir.dvc_gen(\n        {\"foo\": \"foo\", \"bar\": \"bar\", \"foobar\": \"foobar\", \"baz\": \"baz\"},\n        commit=\"add files\",\n    )\n    (tmp_dir / \"foo\").unlink()  # deleted\n\n    assert dvc.data_status(\n        targets=targets, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected | {\"git\": M.dict()}\n    assert dvc.data_status(\n        targets=targets, granular=True, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected | {\"git\": M.dict()}\n\n\ndef with_aliases(values, aliases):\n    \"\"\"Generate test cases by reusing values for given aliases from existing ones.\"\"\"\n    for value in values:\n        targets = value[0]\n        assert isinstance(targets, tuple)\n        yield param(*value)\n    yield from (\n        param(alias, *rest)\n        for alias, to in aliases.items()\n        for targets, *rest in values\n        if to == targets\n    )\n\n\n@pytest.mark.parametrize(\n    \"targets,expected_ng,expected_g\",\n    with_aliases(\n        [\n            (\n                (\"dir\",),\n                {\n                    \"committed\": {\"added\": [join(\"dir\", \"\")]},\n                    \"uncommitted\": {\"modified\": [join(\"dir\", \"\")]},\n                },\n                {\n                    \"committed\": {\n                        \"added\": M.unordered(\n                            join(\"dir\", \"\"),\n                            join(\"dir\", \"foo\"),\n                            join(\"dir\", \"sub\", \"bar\"),\n                            join(\"dir\", \"foobar\"),\n                        )\n                    },\n                    \"uncommitted\": {\n                        \"added\": [join(\"dir\", \"baz\")],\n                        \"modified\": [join(\"dir\", \"\"), join(\"dir\", \"sub\", \"bar\")],\n                        \"deleted\": [join(\"dir\", \"foo\")],\n                    },\n                },\n            ),\n            (\n                (join(\"dir\", \"foo\"),),\n                {},\n                {\n                    \"committed\": {\"added\": [join(\"dir\", \"foo\")]},\n                    \"uncommitted\": {\"deleted\": [join(\"dir\", \"foo\")]},\n                },\n            ),\n            (\n                (join(\"dir\", \"baz\"),),\n                {},\n                {\n                    \"uncommitted\": {\"added\": [join(\"dir\", \"baz\")]},\n                },\n            ),\n            (\n                (join(\"dir\", \"sub\"),),\n                {},\n                {\n                    \"committed\": {\"added\": [join(\"dir\", \"sub\", \"bar\")]},\n                    \"uncommitted\": {\"modified\": [join(\"dir\", \"sub\", \"bar\")]},\n                },\n            ),\n            (\n                (join(\"dir\", \"sub\", \"bar\"),),\n                {},\n                {\n                    \"committed\": {\"added\": [join(\"dir\", \"sub\", \"bar\")]},\n                    \"uncommitted\": {\"modified\": [join(\"dir\", \"sub\", \"bar\")]},\n                },\n            ),\n            (\n                (join(\"dir\", \"foobar\"),),\n                {},\n                {\n                    \"committed\": {\"added\": [join(\"dir\", \"foobar\")]},\n                    \"uncommitted\": {},\n                },\n            ),\n            ((join(\"dir\", \"not-existing-file\"),), {}, {}),\n            ((join(\"dir\", \"not-existing-dir\", \"\"),), {}, {}),\n            ((join(\"dir\", \"sub\", \"not-existing-file\"),), {}, {}),\n            (\n                (join(\"dir\", \"foo\"), join(\"dir\", \"foobar\")),\n                {},\n                {\n                    \"committed\": {\n                        \"added\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"foobar\"))\n                    },\n                    \"uncommitted\": {\"deleted\": [join(\"dir\", \"foo\")]},\n                },\n            ),\n        ],\n        {\n            # the values for these are used from above test cases\n            (join(\"dir\", \"\"),): (\"dir\",),\n            (join(\"dir\", \"sub\", \"\"),): (join(\"dir\", \"sub\"),),\n            (join(\"dir\", \"\"), join(\"dir\", \"foo\")): (\"dir\",),\n        },\n    ),\n)\ndef test_filter_targets_inside_directory_after_dvc_commit(\n    tmp_dir, dvc, scm, targets, expected_ng, expected_g\n):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"sub\": {\"bar\": \"bar\"}, \"foobar\": \"foobar\"}})\n    (tmp_dir / \"dir\" / \"foo\").unlink()  # deleted\n    (tmp_dir / \"dir\" / \"sub\" / \"bar\").write_text(\"bar modified\")\n    (tmp_dir / \"dir\" / \"baz\").write_text(\"baz new\")\n\n    assert dvc.data_status(\n        targets=targets, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected_ng | {\"git\": M.dict()}\n    assert dvc.data_status(\n        targets=targets, granular=True, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected_g | {\"git\": M.dict()}\n\n\n@pytest.mark.parametrize(\n    \"targets,expected_ng,expected_g\",\n    with_aliases(\n        [\n            (\n                (join(\"dir\", \"foo\"),),\n                {},\n                {\"committed\": {\"deleted\": [join(\"dir\", \"foo\")]}},\n            ),\n            ((join(\"dir\", \"baz\"),), {}, {\"committed\": {\"added\": [join(\"dir\", \"baz\")]}}),\n            ((join(\"dir\", \"foobar\"),), {}, {\"unchanged\": [join(\"dir\", \"foobar\")]}),\n            (\n                (\"dir\",),\n                {\"committed\": {\"modified\": [join(\"dir\", \"\")]}},\n                {\n                    \"unchanged\": [join(\"dir\", \"foobar\")],\n                    \"committed\": {\n                        \"added\": [join(\"dir\", \"baz\")],\n                        \"modified\": M.unordered(\n                            join(\"dir\", \"\"), join(\"dir\", \"sub\", \"bar\")\n                        ),\n                        \"deleted\": [join(\"dir\", \"foo\")],\n                    },\n                },\n            ),\n            (\n                (join(\"dir\", \"sub\"),),\n                {},\n                {\"committed\": {\"modified\": [join(\"dir\", \"sub\", \"bar\")]}},\n            ),\n            (\n                (join(\"dir\", \"sub\", \"bar\"),),\n                {},\n                {\"committed\": {\"modified\": [join(\"dir\", \"sub\", \"bar\")]}},\n            ),\n            (\n                (join(\"dir\", \"foo\"), join(\"dir\", \"foobar\")),\n                {},\n                {\n                    \"unchanged\": [join(\"dir\", \"foobar\")],\n                    \"committed\": {\"deleted\": [join(\"dir\", \"foo\")]},\n                },\n            ),\n        ],\n        {\n            (join(\"dir\", \"\"),): (\"dir\",),\n            (join(\"dir\", \"sub\", \"\"),): (join(\"dir\", \"sub\"),),\n            (join(\"dir\", \"\"), join(\"dir\", \"foo\")): (\"dir\",),\n        },\n    ),\n)\ndef test_filter_targets_inside_directory_after_git_commit(\n    tmp_dir, dvc, scm, targets, expected_ng, expected_g\n):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"foo\": \"foo\", \"sub\": {\"bar\": \"bar\"}, \"foobar\": \"foobar\"}},\n        commit=\"add dir\",\n    )\n    (tmp_dir / \"dir\" / \"foo\").unlink()  # deleted\n    (tmp_dir / \"dir\" / \"sub\" / \"bar\").write_text(\"bar modified\")\n    (tmp_dir / \"dir\" / \"baz\").write_text(\"baz new\")\n    dvc.add([\"dir\"])\n\n    assert dvc.data_status(\n        targets=targets, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected_ng | {\"git\": M.dict()}\n    assert dvc.data_status(\n        targets=targets, granular=True, untracked_files=\"all\"\n    ) == EMPTY_STATUS | expected_g | {\"git\": M.dict()}\n\n\n@pytest.mark.parametrize(\"to_check\", [\"remote\", \"cache\"])\n@pytest.mark.parametrize(\n    \"targets, non_granular, granular\",\n    [\n        param((\"foo\",), [\"foo\"], [\"foo\"]),\n        param((\"dir\",), [join(\"dir\", \"\")], [join(\"dir\", \"\"), join(\"dir\", \"bar\")]),\n        param(\n            (join(\"dir\", \"\"),), [join(\"dir\", \"\")], [join(\"dir\", \"\"), join(\"dir\", \"bar\")]\n        ),\n        param((join(\"dir\", \"bar\"),), [], [join(\"dir\", \"bar\")]),\n        param(\n            (join(\"dir\", \"bar\"), \"foo\"),\n            [\"foo\"],\n            M.unordered(join(\"dir\", \"bar\"), \"foo\"),\n        ),\n        param(\n            (join(\"dir\", \"bar\"), \"dir\"),\n            [join(\"dir\", \"\")],\n            M.unordered(join(\"dir\", \"\"), join(\"dir\", \"bar\")),\n        ),\n        param(\n            (\"dir\", \"foo\"),\n            M.unordered(join(\"dir\", \"\"), \"foo\"),\n            M.unordered(join(\"dir\", \"\"), join(\"dir\", \"bar\"), \"foo\"),\n        ),\n    ],\n)\ndef test_filter_targets_not_in_cache(\n    local_remote, tmp_dir, scm, dvc, to_check, targets, non_granular, granular\n):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"dir\": {\"bar\": \"bar\"}})\n\n    if to_check == \"cache\":\n        dvc.push()\n        dvc.cache.local.clear()\n\n    not_in_remote = to_check == \"remote\"\n    key = \"not_in_\" + to_check\n    d = EMPTY_STATUS | {\"git\": M.dict(), \"committed\": M.dict()}\n    assert dvc.data_status(targets, not_in_remote=not_in_remote) == d | {\n        key: non_granular\n    }\n    assert dvc.data_status(targets, granular=True, not_in_remote=not_in_remote) == d | {\n        key: granular\n    }\n\n\ndef test_compat_legacy_new_cache_types(tmp_dir, dvc, scm):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    (tmp_dir / \"foo.dvc\").dump(\n        {\n            \"outs\": [\n                {\"path\": \"foo\", \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\", \"size\": 3},\n            ]\n        }\n    )\n    dvc.add(tmp_dir / \"bar\", no_commit=True)\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"not_in_cache\": M.unordered(\"foo\", \"bar\"),\n        \"committed\": {\"added\": M.unordered(\"foo\", \"bar\")},\n        \"git\": M.dict(),\n    }\n\n    dvc.commit(\"foo\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"not_in_cache\": [\"bar\"],\n        \"committed\": {\"added\": M.unordered(\"foo\", \"bar\")},\n        \"git\": M.dict(),\n    }\n\n    dvc.commit(\"bar\")\n\n    assert dvc.data_status() == {\n        **EMPTY_STATUS,\n        \"not_in_cache\": [],\n        \"committed\": {\"added\": M.unordered(\"foo\", \"bar\")},\n        \"git\": M.dict(),\n    }\n\n\ndef test_missing_cache_remote_check(tmp_dir, dvc, scm, local_remote):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc_gen(\"foobar\", \"foobar\")\n    remove(dvc.cache.repo.path)\n\n    assert dvc.data_status(untracked_files=\"all\", not_in_remote=True) == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n        \"not_in_remote\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n    }\n\n    assert dvc.data_status(\n        granular=True, untracked_files=\"all\", not_in_remote=True\n    ) == {\n        **EMPTY_STATUS,\n        \"untracked\": M.unordered(\"foobar.dvc\", \"dir.dvc\", \".gitignore\"),\n        \"committed\": {\"added\": M.unordered(\"foobar\", join(\"dir\", \"\"))},\n        \"uncommitted\": {\"unknown\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"bar\"))},\n        \"not_in_cache\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n        \"git\": M.dict(),\n        \"not_in_remote\": M.unordered(\"foobar\", join(\"dir\", \"\")),\n    }\n\n    assert dvc.data_status([\"dir\"], untracked_files=\"all\", not_in_remote=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [join(\"dir\", \"\")]},\n        \"not_in_cache\": [join(\"dir\", \"\")],\n        \"git\": M.dict(),\n        \"not_in_remote\": [join(\"dir\", \"\")],\n    }\n\n    assert dvc.data_status(\n        [\"dir\"], untracked_files=\"all\", not_in_remote=True, granular=True\n    ) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [join(\"dir\", \"\")]},\n        \"uncommitted\": {\"unknown\": M.unordered(join(\"dir\", \"foo\"), join(\"dir\", \"bar\"))},\n        \"not_in_cache\": [join(\"dir\", \"\")],\n        \"git\": M.dict(),\n        \"not_in_remote\": [join(\"dir\", \"\")],\n    }\n\n\n@pytest.mark.parametrize(\n    \"targets,expected_non_granular,expected_granular\",\n    [\n        param(\n            None,\n            {\n                \"committed\": {\n                    \"renamed\": [\n                        {\"old\": join(\"dir\", \"\"), \"new\": join(\"dir2\", \"\")},\n                        {\"old\": \"file\", \"new\": \"file2\"},\n                    ],\n                },\n                \"uncommitted\": {\"modified\": [join(\"dir2\", \"\")]},\n            },\n            {\n                \"committed\": {\n                    \"renamed\": [\n                        {\"old\": join(\"dir\", \"\"), \"new\": join(\"dir2\", \"\")},\n                        {\"old\": join(\"dir\", \"bar\"), \"new\": join(\"dir2\", \"bar\")},\n                        {\"old\": join(\"dir\", \"foo\"), \"new\": join(\"dir2\", \"foo\")},\n                        {\"old\": \"file\", \"new\": \"file2\"},\n                    ],\n                },\n                \"uncommitted\": {\n                    \"modified\": [join(\"dir2\", \"\")],\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n        param(\n            [\"dir\"],\n            {\"committed\": {\"deleted\": [join(\"dir\", \"\")]}},\n            {\n                \"committed\": {\n                    \"deleted\": M.unordered(\n                        join(\"dir\", \"\"), join(\"dir\", \"bar\"), join(\"dir\", \"foo\")\n                    )\n                }\n            },\n        ),\n        param(\n            [\"dir2\"],\n            {\n                \"committed\": {\"added\": [join(\"dir2\", \"\")]},\n                \"uncommitted\": {\"modified\": [join(\"dir2\", \"\")]},\n            },\n            {\n                \"committed\": {\n                    \"added\": M.unordered(\n                        join(\"dir2\", \"\"),\n                        join(\"dir2\", \"bar\"),\n                        join(\"dir2\", \"foo\"),\n                    ),\n                },\n                \"uncommitted\": {\n                    \"modified\": [join(\"dir2\", \"\")],\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n        param(\n            [join(\"dir\", \"bar\")], {}, {\"committed\": {\"deleted\": [join(\"dir\", \"bar\")]}}\n        ),\n        param(\n            [join(\"dir\", \"foo\")], {}, {\"committed\": {\"deleted\": [join(\"dir\", \"foo\")]}}\n        ),\n        param(\n            [join(\"dir2\", \"bar\")], {}, {\"committed\": {\"added\": [join(\"dir2\", \"bar\")]}}\n        ),\n        param(\n            [join(\"dir2\", \"foobar\")],\n            {},\n            {\"uncommitted\": {\"added\": [join(\"dir2\", \"foobar\")]}},\n        ),\n        param(\n            [\"file\"],\n            {\"committed\": {\"deleted\": [\"file\"]}},\n            {\"committed\": {\"deleted\": [\"file\"]}},\n        ),\n        param(\n            [\"file2\"],\n            {\"committed\": {\"added\": [\"file2\"]}},\n            {\"committed\": {\"added\": [\"file2\"]}},\n        ),\n        param(\n            [\"dir\", \"dir2\"],\n            {\n                \"committed\": {\n                    \"renamed\": [{\"old\": join(\"dir\", \"\"), \"new\": join(\"dir2\", \"\")}],\n                },\n                \"uncommitted\": {\"modified\": [join(\"dir2\", \"\")]},\n            },\n            {\n                \"committed\": {\n                    \"renamed\": [\n                        {\"old\": join(\"dir\", \"\"), \"new\": join(\"dir2\", \"\")},\n                        {\"old\": join(\"dir\", \"bar\"), \"new\": join(\"dir2\", \"bar\")},\n                        {\"old\": join(\"dir\", \"foo\"), \"new\": join(\"dir2\", \"foo\")},\n                    ]\n                },\n                \"uncommitted\": {\n                    \"modified\": [join(\"dir2\", \"\")],\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n        param(\n            [\"file\", \"file2\"],\n            {\"committed\": {\"renamed\": [{\"old\": \"file\", \"new\": \"file2\"}]}},\n            {\"committed\": {\"renamed\": [{\"old\": \"file\", \"new\": \"file2\"}]}},\n        ),\n        param(\n            [join(\"dir\", \"foo\"), join(\"dir2\", \"foobar\")],\n            {},\n            {\n                \"committed\": {\"deleted\": [join(\"dir\", \"foo\")]},\n                \"uncommitted\": {\"added\": [join(\"dir2\", \"foobar\")]},\n            },\n        ),\n        param(\n            [join(\"dir2\", \"foo\"), join(\"dir2\", \"foobar\")],\n            {},\n            {\n                \"committed\": {\"added\": [join(\"dir2\", \"foo\")]},\n                \"uncommitted\": {\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n        param(\n            [\"dir2\", \"file2\"],\n            {\n                \"committed\": {\"added\": M.unordered(join(\"dir2\", \"\"), \"file2\")},\n                \"uncommitted\": {\"modified\": [join(\"dir2\", \"\")]},\n            },\n            {\n                \"committed\": {\n                    \"added\": M.unordered(\n                        join(\"dir2\", \"\"),\n                        join(\"dir2\", \"bar\"),\n                        join(\"dir2\", \"foo\"),\n                        \"file2\",\n                    )\n                },\n                \"uncommitted\": {\n                    \"modified\": [join(\"dir2\", \"\")],\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n        param(\n            [\"dir2\", join(\"dir2\", \"foobar\"), \"file\"],\n            {\n                \"uncommitted\": {\"modified\": [join(\"dir2\", \"\")]},\n                \"committed\": {\"added\": [join(\"dir2\", \"\")], \"deleted\": [\"file\"]},\n            },\n            {\n                \"committed\": {\n                    \"added\": M.unordered(\n                        join(\"dir2\", \"\"), join(\"dir2\", \"bar\"), join(\"dir2\", \"foo\")\n                    ),\n                    \"deleted\": [\"file\"],\n                },\n                \"uncommitted\": {\n                    \"modified\": [join(\"dir2\", \"\")],\n                    \"renamed\": [\n                        {\"old\": join(\"dir2\", \"foo\"), \"new\": join(\"dir2\", \"foobar\")}\n                    ],\n                },\n            },\n        ),\n    ],\n)\ndef test_renames(tmp_dir, scm, dvc, targets, expected_non_granular, expected_granular):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}, \"file\": \"file\"}, commit=\"add dir and file\"\n    )\n    dvc.move(\"dir\", \"dir2\")\n    dvc.move(\"file\", \"file2\")\n    shutil.move(tmp_dir / \"dir2\" / \"foo\", tmp_dir / \"dir2\" / \"foobar\")\n\n    assert (\n        dvc.data_status(with_renames=True, targets=targets)\n        == EMPTY_STATUS | {\"git\": M.dict()} | expected_non_granular\n    )\n    assert (\n        dvc.data_status(granular=True, with_renames=True, targets=targets)\n        == EMPTY_STATUS | {\"git\": M.dict()} | expected_granular\n    )\n\n\ndef test_shallow_should_iterate_upto_tracked_directory(tmp_dir, dvc, scm, local_remote):\n    \"\"\"Testing regression for https://github.com/treeverse/dvc/issues/10899.\"\"\"\n\n    tmp_dir.scm_gen({\"dir\": {\".gitkeep\": \"\"}}, commit=\"mk dir\")\n    dataset_root = join(\"dir\", \"data\", \"\")\n    tmp_dir.dvc_gen({dataset_root: {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    files = [dataset_root, join(dataset_root, \"foo\"), join(dataset_root, \"bar\")]\n\n    assert dvc.data_status(not_in_remote=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [dataset_root]},\n        \"not_in_remote\": [dataset_root],\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True, not_in_remote=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": M.unordered(*files)},\n        \"not_in_remote\": M.unordered(*files),\n        \"git\": M.dict(),\n    }\n\n    assert dvc.push() == 3\n\n    assert dvc.data_status(not_in_remote=True, remote_refresh=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": [dataset_root]},\n        \"git\": M.dict(),\n    }\n    assert dvc.data_status(granular=True, not_in_remote=True, remote_refresh=True) == {\n        **EMPTY_STATUS,\n        \"committed\": {\"added\": M.unordered(*files)},\n        \"git\": M.dict(),\n    }\n\n\ndef test_duplicate_hashes_not_in_remote(tmp_dir, dvc, scm, local_remote):\n    \"\"\"Test that files with identical content (same hash) are all correctly reported.\n\n    Regression test for https://github.com/iterative/dvc/issues/10959\n    \"\"\"\n    tmp_dir.dvc_gen(\n        {\"foo\": \"content\", \"bar\": \"content\", \"foobar\": \"foobar\"}, commit=\"add files\"\n    )\n    assert dvc.data_status(not_in_remote=True, granular=True, remote_refresh=True) == {\n        **EMPTY_STATUS,\n        \"unchanged\": M.unordered(\"foo\", \"bar\", \"foobar\"),\n        \"not_in_remote\": M.unordered(\"foo\", \"bar\", \"foobar\"),\n        \"git\": M.dict(),\n    }\n\n    dvc.push()\n    assert dvc.data_status(not_in_remote=True, granular=True, remote_refresh=True) == {\n        **EMPTY_STATUS,\n        \"unchanged\": M.unordered(\"foo\", \"bar\", \"foobar\"),\n        \"not_in_remote\": [],\n        \"git\": M.dict(),\n    }\n"
  },
  {
    "path": "tests/func/test_dataset.py",
    "content": "import os\nfrom datetime import datetime, timezone\nfrom typing import TYPE_CHECKING\n\nimport pytest\nfrom attrs import define, evolve, field, has\n\nfrom dvc.dependency.base import Dependency\nfrom dvc.exceptions import ReproductionError\nfrom dvc.repo.datasets import (\n    DatachainDataset,\n    DatachainDatasetLock,\n    DatasetNotFoundError,\n    DatasetSpec,\n    DVCDataset,\n    DVCDatasetLock,\n    DVCDatasetSpec,\n    FileInfo,\n    URLDataset,\n    URLDatasetLock,\n)\nfrom dvc_data.hashfile.meta import Meta\nfrom dvc_data.index import HashInfo, Tree\n\nif TYPE_CHECKING:\n    from dvc.repo import Repo\n\n\n@define\nclass MockedDatachainVersionInfo:\n    version: int\n    created_at: datetime = field(factory=lambda: datetime.now(timezone.utc))\n\n\ndef evolve_recursive(inst, **changes):\n    \"\"\"Recursive attr.evolve() method, where any attr-based attributes\n    will be evolved too.\n    \"\"\"\n    for key, value in changes.items():\n        v = getattr(inst, key)\n        if has(type(v)) and isinstance(value, dict):\n            value = evolve_recursive(v, **value)\n        changes[key] = value\n    return evolve(inst, **changes)\n\n\ndef test_dvc(tmp_dir, scm, dvc: \"Repo\"):\n    datasets = dvc.datasets\n\n    tmp_dir.scm_gen(\"file\", \"file\", commit=\"add file\")\n    dataset = datasets.add(\"mydataset\", tmp_dir.fs_path, \"dvc\", path=\"file\")\n    expected = DVCDataset(\n        manifest_path=(tmp_dir / \"dvc.yaml\").fs_path,\n        spec=DVCDatasetSpec(\n            name=\"mydataset\", url=tmp_dir.fs_path, type=\"dvc\", path=\"file\"\n        ),\n        lock=DVCDatasetLock(\n            name=\"mydataset\",\n            url=tmp_dir.fs_path,\n            type=\"dvc\",\n            path=\"file\",\n            rev_lock=scm.get_rev(),\n        ),\n    )\n    assert \"mydataset\" in datasets\n    assert dataset == datasets[\"mydataset\"] == expected\n    tmp_dir.scm_gen(\"file\", \"file\", commit=\"update file\")\n\n    old, new = datasets.update(\"mydataset\")\n    assert old == dataset\n    assert old != new\n    expected = evolve_recursive(expected, lock={\"rev_lock\": scm.get_rev()})\n    assert new == datasets[\"mydataset\"] == expected\n\n    # noop\n    old, new = datasets.update(\"mydataset\")\n    assert old == new\n\n\ndef test_datachain(tmp_dir, dvc, mocker):\n    datasets = dvc.datasets\n\n    version_info = [MockedDatachainVersionInfo(1), MockedDatachainVersionInfo(2)]\n    version_info.append(version_info[1])\n    mocker.patch(\"dvc.repo.datasets._get_dataset_info\", side_effect=version_info)\n\n    dataset = datasets.add(\"mydataset\", \"dataset\", \"dc\")\n    expected = DatachainDataset(\n        manifest_path=(tmp_dir / \"dvc.yaml\").fs_path,\n        spec=DatasetSpec(name=\"mydataset\", url=\"dataset\", type=\"dc\"),\n        lock=DatachainDatasetLock(\n            name=\"mydataset\",\n            url=\"dataset\",\n            type=\"dc\",\n            version=1,\n            created_at=version_info[0].created_at,\n        ),\n    )\n    assert \"mydataset\" in datasets\n    assert dataset == datasets[\"mydataset\"] == expected\n\n    old, new = datasets.update(\"mydataset\")\n    assert old == dataset\n    assert old != new\n    expected = evolve_recursive(\n        expected, lock={\"version\": 2, \"created_at\": version_info[1].created_at}\n    )\n    assert new == datasets[\"mydataset\"] == expected\n\n    # noop\n    old, new = datasets.update(\"mydataset\")\n    assert old == new\n\n\ndef test_url(tmp_dir, dvc, mocker):\n    datasets = dvc.datasets\n\n    tree = Tree()\n    tree.add((\"foo\",), Meta(version_id=\"1\"), None)\n    tree.digest(with_meta=True)\n    tree_meta = Meta(isdir=True)\n\n    def mocked_save(d):\n        d.meta, d.obj, d.hash_info = tree_meta, tree, HashInfo(\"md5\", \"value.dir\")\n\n    mocker.patch.object(Dependency, \"save\", mocked_save)\n\n    dataset = datasets.add(\"mydataset\", \"s3://dataset\", \"url\")\n    expected = URLDataset(\n        manifest_path=(tmp_dir / \"dvc.yaml\").fs_path,\n        spec=DatasetSpec(name=\"mydataset\", url=\"s3://dataset\", type=\"url\"),\n        lock=URLDatasetLock(\n            name=\"mydataset\",\n            url=\"s3://dataset\",\n            type=\"url\",\n            meta=Meta(isdir=True),\n            files=[FileInfo(relpath=\"foo\", meta=Meta(version_id=\"1\"))],\n        ),\n    )\n    assert \"mydataset\" in datasets\n    assert dataset == datasets[\"mydataset\"] == expected\n\n    tree.add((\"bar\",), Meta(version_id=\"2\"), None)\n    old, new = datasets.update(\"mydataset\")\n    assert old == dataset\n    assert old != new\n\n    assert expected.lock\n    new_files = [\n        *expected.lock.files,\n        FileInfo(relpath=\"bar\", meta=Meta(version_id=\"2\")),\n    ]\n\n    expected = evolve_recursive(expected, lock={\"files\": new_files})\n    assert new == datasets[\"mydataset\"] == expected\n\n    # noop\n    old, new = datasets.update(\"mydataset\")\n    assert old == new\n\n\ndef test_dvc_dump(tmp_dir, dvc):\n    manifest_path = os.path.join(tmp_dir, \"dvc.yaml\")\n    spec = DVCDatasetSpec(\n        name=\"mydataset\", url=tmp_dir.fs_path, type=\"dvc\", path=\"path\", rev=\"main\"\n    )\n    lock = DVCDatasetLock(rev_lock=\"0\" * 40, **spec.to_dict())\n    dataset = DVCDataset(manifest_path=manifest_path, spec=spec, lock=lock)\n\n    dvc.datasets.dump(dataset)\n\n    spec_d = {\n        \"name\": \"mydataset\",\n        \"type\": \"dvc\",\n        \"url\": tmp_dir.fs_path,\n        \"path\": \"path\",\n        \"rev\": \"main\",\n    }\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\"datasets\": [spec_d]}\n    assert (tmp_dir / \"dvc.lock\").parse() == {\n        \"schema\": \"2.0\",\n        \"stages\": {},\n        \"datasets\": [{**spec_d, \"rev_lock\": \"0\" * 40}],\n    }\n\n    dvc._reset()\n    assert \"_datasets\" not in vars(dvc.datasets)\n    # test that we can read them back\n    assert dvc.datasets[\"mydataset\"] == dataset\n\n\ndef test_datachain_dump(tmp_dir, dvc):\n    manifest_path = os.path.join(tmp_dir, \"dvc.yaml\")\n    spec = DatasetSpec(name=\"mydataset\", url=\"dataset\", type=\"dc\")\n    dt = datetime.now(tz=timezone.utc)\n    lock = DatachainDatasetLock(version=1, created_at=dt, **spec.to_dict())\n    dataset = DatachainDataset(manifest_path=manifest_path, spec=spec, lock=lock)\n\n    dvc.datasets.dump(dataset)\n\n    spec_d = {\"name\": \"mydataset\", \"type\": \"dc\", \"url\": \"dataset\"}\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\"datasets\": [spec_d]}\n    assert (tmp_dir / \"dvc.lock\").parse() == {\n        \"schema\": \"2.0\",\n        \"stages\": {},\n        \"datasets\": [{**spec_d, \"version\": 1, \"created_at\": dt.isoformat()}],\n    }\n\n    dvc._reset()\n    assert \"_datasets\" not in vars(dvc.datasets)\n    # test that we can read them back\n    assert dvc.datasets[\"mydataset\"] == dataset\n\n\ndef test_url_dump(tmp_dir, dvc):\n    manifest_path = os.path.join(tmp_dir, \"dvc.yaml\")\n    spec = DatasetSpec(name=\"mydataset\", url=\"s3://dataset\", type=\"url\")\n    files = [FileInfo(relpath=\"foo\", meta=Meta(version_id=\"1\"))]\n    lock = URLDatasetLock(meta=Meta(isdir=True), files=files, **spec.to_dict())\n    dataset = URLDataset(manifest_path=manifest_path, spec=spec, lock=lock)\n\n    dvc.datasets.dump(dataset)\n\n    spec_d = {\"name\": \"mydataset\", \"url\": \"s3://dataset\", \"type\": \"url\"}\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\"datasets\": [spec_d]}\n    assert (tmp_dir / \"dvc.lock\").parse() == {\n        \"schema\": \"2.0\",\n        \"datasets\": [\n            {\n                **spec_d,\n                \"meta\": {\"isdir\": True},\n                \"files\": [{\"relpath\": \"foo\", \"meta\": {\"version_id\": \"1\"}}],\n            }\n        ],\n        \"stages\": {},\n    }\n\n    dvc._reset()\n    assert \"_datasets\" not in vars(dvc.datasets)\n    # test that we can read them back\n    assert dvc.datasets[\"mydataset\"] == dataset\n\n\ndef test_invalidation(tmp_dir, dvc):\n    manifest_path = os.path.join(tmp_dir, \"dvc.yaml\")\n    spec = DatasetSpec(name=\"mydataset\", url=\"url1\", type=\"url\")\n    lock = DatachainDatasetLock(\n        name=\"mydataset\",\n        url=\"dataset\",\n        type=\"dc\",\n        version=1,\n        created_at=datetime.now(tz=timezone.utc),\n    )\n    dvc.datasets._dump_spec(manifest_path, spec)\n    dvc.datasets._dump_lock(manifest_path, lock)\n\n    assert dvc.datasets[\"mydataset\"] == URLDataset(\n        manifest_path=manifest_path,\n        spec=spec,\n        lock=None,  # lock should be discarded\n    )\n\n\ndef test_dvc_dataset_pipeline(tmp_dir, dvc, scm):\n    dvc.datasets.add(\"mydataset\", tmp_dir.fs_path, \"dvc\")\n\n    stage = dvc.stage.add(cmd=\"echo\", name=\"train\", deps=[\"ds://mydataset\"])\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\n        \"datasets\": [{\"name\": \"mydataset\", \"url\": tmp_dir.fs_path, \"type\": \"dvc\"}],\n        \"stages\": {\"train\": {\"cmd\": \"echo\", \"deps\": [\"ds://mydataset\"]}},\n    }\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"new\"}}]}\n    assert dvc.reproduce() == [stage]\n\n    d = (tmp_dir / \"dvc.lock\").parse()\n    assert d[\"stages\"][\"train\"][\"deps\"][0] == {\n        \"path\": \"ds://mydataset\",\n        \"dataset\": d[\"datasets\"][0],\n    }\n\n    assert dvc.status() == {}\n    assert dvc.reproduce() == []\n\n    tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"add foo\")\n    dvc.datasets.update(\"mydataset\")\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"modified\"}}]}\n    assert dvc.reproduce() == [stage]\n\n\ndef test_datachain_dataset_pipeline(mocker, tmp_dir, dvc):\n    version_info = [MockedDatachainVersionInfo(1), MockedDatachainVersionInfo(2)]\n    mocker.patch(\"dvc.repo.datasets._get_dataset_info\", side_effect=version_info)\n\n    dvc.datasets.add(\"mydataset\", \"dataset\", \"dc\")\n\n    stage = dvc.stage.add(cmd=\"echo\", name=\"train\", deps=[\"ds://mydataset\"])\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\n        \"datasets\": [{\"name\": \"mydataset\", \"url\": \"dataset\", \"type\": \"dc\"}],\n        \"stages\": {\"train\": {\"cmd\": \"echo\", \"deps\": [\"ds://mydataset\"]}},\n    }\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"new\"}}]}\n    assert dvc.reproduce() == [stage]\n\n    d = (tmp_dir / \"dvc.lock\").parse()\n    assert d[\"stages\"][\"train\"][\"deps\"][0] == {\n        \"path\": \"ds://mydataset\",\n        \"dataset\": d[\"datasets\"][0],\n    }\n\n    assert dvc.status() == {}\n    assert dvc.reproduce() == []\n\n    dvc.datasets.update(\"mydataset\")\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"modified\"}}]}\n    assert dvc.reproduce() == [stage]\n\n\ndef test_url_dataset_pipeline(mocker, tmp_dir, dvc):\n    tree = Tree()\n    tree.add((\"foo\",), Meta(version_id=\"1\"), None)\n    tree.digest(with_meta=True)\n    tree_meta = Meta(isdir=True)\n\n    def mocked_save(d):\n        d.meta, d.obj, d.hash_info = tree_meta, tree, HashInfo(\"md5\", \"value.dir\")\n\n    mocker.patch.object(Dependency, \"save\", mocked_save)\n\n    dvc.datasets.add(\"mydataset\", \"s3://mydataset\", \"url\")\n\n    stage = dvc.stage.add(cmd=\"echo\", name=\"train\", deps=[\"ds://mydataset\"])\n    assert (tmp_dir / \"dvc.yaml\").parse() == {\n        \"datasets\": [{\"name\": \"mydataset\", \"url\": \"s3://mydataset\", \"type\": \"url\"}],\n        \"stages\": {\"train\": {\"cmd\": \"echo\", \"deps\": [\"ds://mydataset\"]}},\n    }\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"new\"}}]}\n    assert dvc.reproduce() == [stage]\n\n    d = (tmp_dir / \"dvc.lock\").parse()\n    assert d[\"stages\"][\"train\"][\"deps\"][0] == {\n        \"path\": \"ds://mydataset\",\n        \"dataset\": d[\"datasets\"][0],\n    }\n\n    assert dvc.status() == {}\n    assert dvc.reproduce() == []\n\n    tree.add((\"bar\",), Meta(version_id=\"2\"), None)\n    dvc.datasets.update(\"mydataset\")\n\n    assert dvc.status() == {\"train\": [{\"changed deps\": {\"ds://mydataset\": \"modified\"}}]}\n    assert dvc.reproduce() == [stage]\n\n\ndef test_pipeline_when_not_in_sync(tmp_dir, dvc):\n    manifest_path = os.path.join(tmp_dir, \"dvc.yaml\")\n    spec = DatasetSpec(name=\"mydataset\", url=\"url1\", type=\"url\")\n    lock = DatachainDatasetLock(\n        name=\"mydataset\",\n        url=\"dataset\",\n        type=\"dc\",\n        version=1,\n        created_at=datetime.now(tz=timezone.utc),\n    )\n    dvc.datasets._dump_spec(manifest_path, spec)\n    dvc.datasets._dump_lock(manifest_path, lock)\n\n    dvc.stage.add(name=\"train\", cmd=\"echo\", deps=[\"ds://mydataset\"])\n    assert dvc.status() == {\n        \"train\": [{\"changed deps\": {\"ds://mydataset\": \"not in sync\"}}]\n    }\n    with pytest.raises(ReproductionError) as exc:\n        dvc.reproduce()\n    assert \"not in sync\" in str(exc.value.__cause__)\n\n\ndef test_collect(tmp_dir, dvc):\n    manifest_path1 = os.path.join(tmp_dir, \"dvc.yaml\")\n    dt = datetime.now(tz=timezone.utc)\n    spec = DatasetSpec(name=\"mydataset1\", url=\"url1\", type=\"dc\")\n    lock = DatachainDatasetLock(version=1, created_at=dt, **spec.to_dict())\n    mydataset1 = DatachainDataset(manifest_path=manifest_path1, spec=spec, lock=lock)\n    dvc.datasets.dump(mydataset1)\n\n    (tmp_dir / \"sub\").mkdir()\n    manifest_path2 = os.path.join(tmp_dir, \"sub\", \"dvc.yaml\")\n    spec = DVCDatasetSpec(\n        name=\"mydataset2\", url=tmp_dir.fs_path, type=\"dvc\", path=\"path\"\n    )\n    lock = DVCDatasetLock(rev_lock=\"0\" * 40, **spec.to_dict())\n    mydataset2 = DVCDataset(manifest_path=manifest_path2, spec=spec, lock=lock)\n    dvc.datasets.dump(mydataset2)\n\n    dvc._reset()\n    assert \"_datasets\" not in vars(dvc.datasets)\n\n    assert len(dvc.datasets) == 2\n    assert \"mydataset1\" in dvc.datasets\n    assert \"mydataset2\" in dvc.datasets\n    assert list(iter(dvc.datasets)) == [\"mydataset1\", \"mydataset2\"]\n    assert dvc.datasets[\"mydataset1\"] == mydataset1\n    assert dvc.datasets[\"mydataset2\"] == mydataset2\n    assert dict(dvc.datasets.items()) == {\n        \"mydataset1\": mydataset1,\n        \"mydataset2\": mydataset2,\n    }\n\n    with pytest.raises(DatasetNotFoundError, match=r\"^dataset not found$\"):\n        dvc.datasets[\"not-existing\"]\n\n\ndef test_parametrized(tmp_dir, dvc):\n    (tmp_dir / \"dvc.yaml\").dump(\n        {\n            \"datasets\": [\n                {\"name\": \"${ds1.name}\", \"url\": \"${ds1.url}\", \"type\": \"dc\"},\n                {\n                    \"name\": \"${ds2.name}\",\n                    \"url\": \"${ds2.url}\",\n                    \"type\": \"dvc\",\n                    \"path\": \"${ds2.path}\",\n                },\n                {\n                    \"name\": \"${ds3.name}\",\n                    \"url\": \"${ds3.url}\",\n                    \"type\": \"url\",\n                },\n            ]\n        }\n    )\n    (tmp_dir / \"params.yaml\").dump(\n        {\n            \"ds1\": {\"name\": \"dogs\", \"url\": \"dc://dogs\"},\n            \"ds2\": {\n                \"name\": \"example-get-started\",\n                \"url\": \"git@github.com:iterative/example-get-started.git\",\n                \"path\": \"path\",\n            },\n            \"ds3\": {\n                \"name\": \"cloud-versioning-demo\",\n                \"url\": \"s3://cloud-versioning-demo\",\n            },\n        }\n    )\n\n    path = (tmp_dir / \"dvc.yaml\").fs_path\n    assert dict(dvc.datasets.items()) == {\n        \"dogs\": DatachainDataset(\n            manifest_path=path,\n            spec=DatasetSpec(name=\"dogs\", url=\"dc://dogs\", type=\"dc\"),\n        ),\n        \"example-get-started\": DVCDataset(\n            manifest_path=path,\n            spec=DVCDatasetSpec(\n                name=\"example-get-started\",\n                url=\"git@github.com:iterative/example-get-started.git\",\n                path=\"path\",\n                type=\"dvc\",\n            ),\n        ),\n        \"cloud-versioning-demo\": URLDataset(\n            manifest_path=path,\n            spec=DatasetSpec(\n                name=\"cloud-versioning-demo\",\n                url=\"s3://cloud-versioning-demo\",\n                type=\"url\",\n            ),\n        ),\n    }\n"
  },
  {
    "path": "tests/func/test_diff.py",
    "content": "import hashlib\nimport os\n\nimport pytest\n\nfrom dvc.exceptions import DvcException\nfrom dvc.utils.fs import remove\n\n\ndef digest(text):\n    return hashlib.md5(bytes(text, \"utf-8\"), usedforsecurity=False).hexdigest()\n\n\ndef test_no_scm(tmp_dir, dvc):\n    from dvc.scm import NoSCMError\n\n    tmp_dir.dvc_gen(\"file\", \"text\")\n\n    with pytest.raises(NoSCMError):\n        dvc.diff()\n\n\ndef test_same_rev(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"text\", commit=\"add file\")\n\n    assert not dvc.diff(\"HEAD\", \"HEAD\")\n    assert not dvc.diff(\"HEAD\", \"master\")\n    assert not dvc.diff(\"master\", \"HEAD\")\n\n\ndef test_added(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"text\")\n\n    assert dvc.diff() == {\n        \"added\": [{\"path\": \"file\", \"hash\": digest(\"text\")}],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_added_deep(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"datas\": {\"data\": {\"file\": \"text\"}}})\n    dvc.add(os.path.join(\"datas\", \"data\"))\n\n    assert dvc.diff() == {\n        \"added\": [\n            {\n                \"path\": os.path.join(\"datas\", \"data\" + os.sep),\n                \"hash\": \"0dab3fae569586d4c33272e5011605bf.dir\",\n            },\n            {\n                \"path\": os.path.join(\"datas\", \"data\", \"file\"),\n                \"hash\": \"1cb251ec0d568de6a929b520c4aed8d1\",\n            },\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_no_cache_entry(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"first\", commit=\"add a file\")\n\n    tmp_dir.dvc_gen({\"dir\": {\"1\": \"1\", \"2\": \"2\"}})\n    tmp_dir.dvc_gen(\"file\", \"second\")\n\n    remove(tmp_dir / \".dvc\" / \"cache\")\n\n    dir_checksum = \"5fb6b29836c388e093ca0715c872fe2a.dir\"\n\n    assert dvc.diff() == {\n        \"added\": [\n            {\"path\": os.path.join(\"dir\", \"\"), \"hash\": dir_checksum},\n            {\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")},\n            {\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"2\")},\n        ],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": \"file\",\n                \"hash\": {\"old\": digest(\"first\"), \"new\": digest(\"second\")},\n            }\n        ],\n        \"not in cache\": [\n            {\n                \"path\": \"file\",\n                \"hash\": digest(\"first\"),\n            }\n        ],\n        \"renamed\": [],\n    }\n\n\ndef test_diff_no_cache(tmp_dir, scm, dvc):\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"file\": \"file content\"}}, commit=\"first\")\n    scm.tag(\"v1\")\n    dvc.cache.local.clear()\n    old_digest = stage.outs[0].hash_info.value\n    dir_path = os.path.join(\"dir\", \"\")\n\n    default_result = {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [],\n        \"not in cache\": [],\n    }\n\n    assert dvc.diff(\"v1\") == default_result | {\n        \"not in cache\": [{\"path\": dir_path, \"hash\": old_digest}],\n    }\n    assert dvc.diff(\"HEAD\", \"v1\") == {}\n    assert dvc.diff(\"v1\", \"HEAD\") == {}\n\n    (stage,) = tmp_dir.dvc_gen(\n        {\"dir\": {\"file\": \"modified file content\"}}, commit=\"first\"\n    )\n    scm.tag(\"v2\")\n    new_digest = stage.outs[0].hash_info.value\n\n    assert dvc.diff(\"v2\") == {}\n    assert dvc.diff(\"v1\") == default_result | {\n        \"modified\": [\n            {\"path\": dir_path, \"hash\": {\"old\": old_digest, \"new\": new_digest}}\n        ],\n        \"not in cache\": [{\"path\": dir_path, \"hash\": old_digest}],\n    }\n    assert dvc.diff(\"v1\", \"v2\") == default_result | {\n        \"modified\": [\n            {\"path\": dir_path, \"hash\": {\"old\": old_digest, \"new\": new_digest}}\n        ],\n    }\n\n    remove(dvc.cache.local.path)\n    # drop the cache so that we can test as if we don't know what entries are\n    # in the missing cache entry.\n    dvc.drop_data_index()\n\n    assert dvc.diff(\"v2\") == default_result | {\n        \"not in cache\": [{\"path\": dir_path, \"hash\": new_digest}],\n    }\n    assert dvc.diff(\"v1\") == default_result | {\n        \"modified\": [\n            {\"path\": dir_path, \"hash\": {\"old\": old_digest, \"new\": new_digest}}\n        ],\n        \"not in cache\": [{\"path\": dir_path, \"hash\": old_digest}],\n    }\n    assert dvc.diff(\"v2\", \"v1\") == default_result | {\n        \"modified\": [\n            {\"path\": dir_path, \"hash\": {\"old\": new_digest, \"new\": old_digest}}\n        ],\n    }\n    assert dvc.diff(\"v1\", \"v2\") == default_result | {\n        \"modified\": [\n            {\"path\": dir_path, \"hash\": {\"old\": old_digest, \"new\": new_digest}}\n        ],\n    }\n    assert dvc.diff() == default_result | {\n        \"not in cache\": [{\"path\": dir_path, \"hash\": new_digest}],\n    }\n\n    remove(str(tmp_dir / \"dir\"))\n    assert dvc.diff() == default_result | {\n        \"deleted\": [{\"path\": dir_path, \"hash\": new_digest}],\n        \"not in cache\": [{\"path\": dir_path, \"hash\": new_digest}],\n    }\n\n\n@pytest.mark.parametrize(\"delete_data\", [True, False])\ndef test_deleted(tmp_dir, scm, dvc, delete_data):\n    tmp_dir.dvc_gen(\"file\", \"text\", commit=\"add file\")\n    (tmp_dir / \"file.dvc\").unlink()\n    if delete_data:\n        (tmp_dir / \"file\").unlink()\n\n    assert dvc.diff() == {\n        \"added\": [],\n        \"deleted\": [{\"path\": \"file\", \"hash\": digest(\"text\")}],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_modified(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"first\", commit=\"first version\")\n    tmp_dir.dvc_gen(\"file\", \"second\")\n\n    assert dvc.diff() == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": \"file\",\n                \"hash\": {\"old\": digest(\"first\"), \"new\": digest(\"second\")},\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_modified_subrepo(tmp_dir, scm, dvc):\n    from dvc.repo import Repo\n\n    tmp_dir.gen({\"subdir\": {\"file\": \"first\"}})\n    subrepo_dir = tmp_dir / \"subdir\"\n\n    with subrepo_dir.chdir():\n        subrepo = Repo.init(subdir=True)\n        subrepo.add(\"file\")\n\n    scm.add(os.path.join(\"subdir\", \"file.dvc\"))\n    scm.commit(\"init\")\n\n    (subrepo_dir / \"file\").write_text(\"second\")\n\n    with subrepo_dir.chdir():\n        subrepo = Repo()\n        assert subrepo.diff() == {\n            \"added\": [],\n            \"deleted\": [],\n            \"modified\": [\n                {\n                    \"path\": \"file\",\n                    \"hash\": {\"old\": digest(\"first\"), \"new\": digest(\"second\")},\n                }\n            ],\n            \"not in cache\": [],\n            \"renamed\": [],\n        }\n\n\ndef test_refs(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"first\", commit=\"first version\")\n    tmp_dir.dvc_gen(\"file\", \"second\", commit=\"second version\")\n    tmp_dir.dvc_gen(\"file\", \"third\", commit=\"third version\")\n\n    HEAD_2 = digest(\"first\")  # noqa: N806\n    HEAD_1 = digest(\"second\")  # noqa: N806\n    HEAD = digest(\"third\")  # noqa: N806\n\n    assert dvc.diff(\"HEAD~1\") == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [{\"path\": \"file\", \"hash\": {\"old\": HEAD_1, \"new\": HEAD}}],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    assert dvc.diff(\"HEAD~2\", \"HEAD~1\") == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [{\"path\": \"file\", \"hash\": {\"old\": HEAD_2, \"new\": HEAD_1}}],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    with pytest.raises(DvcException, match=r\"unknown Git revision 'missing'\"):\n        dvc.diff(\"missing\")\n\n\ndef test_directories(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"1\": \"1\", \"2\": \"2\"}}, commit=\"add a directory\")\n    tmp_dir.dvc_gen({\"dir\": {\"3\": \"3\"}}, commit=\"add a file\")\n    tmp_dir.dvc_gen({\"dir\": {\"2\": \"two\"}}, commit=\"modify a file\")\n\n    (tmp_dir / \"dir\" / \"2\").unlink()\n    assert dvc.status() != {}  # sanity check\n    dvc.add(\"dir\")\n    scm.add([\"dir.dvc\"])\n    scm.commit(\"delete a file\")\n\n    # The \":/<text>\" format is a way to specify revisions by commit message:\n    #       https://git-scm.com/docs/revisions\n    #\n    assert dvc.diff(\":/init\", \":/directory\") == {\n        \"added\": [\n            {\n                \"path\": os.path.join(\"dir\", \"\"),\n                \"hash\": \"5fb6b29836c388e093ca0715c872fe2a.dir\",\n            },\n            {\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")},\n            {\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"2\")},\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    assert dvc.diff(\":/directory\", \":/modify\") == {\n        \"added\": [{\"path\": os.path.join(\"dir\", \"3\"), \"hash\": digest(\"3\")}],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": os.path.join(\"dir\", \"\"),\n                \"hash\": {\n                    \"old\": \"5fb6b29836c388e093ca0715c872fe2a.dir\",\n                    \"new\": \"9b5faf37366b3370fd98e3e60ca439c1.dir\",\n                },\n            },\n            {\n                \"path\": os.path.join(\"dir\", \"2\"),\n                \"hash\": {\"old\": digest(\"2\"), \"new\": digest(\"two\")},\n            },\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    assert dvc.diff(\":/modify\", \":/delete\") == {\n        \"added\": [],\n        \"deleted\": [{\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"two\")}],\n        \"modified\": [\n            {\n                \"path\": os.path.join(\"dir\", \"\"),\n                \"hash\": {\n                    \"old\": \"9b5faf37366b3370fd98e3e60ca439c1.dir\",\n                    \"new\": \"83ae82fb367ac9926455870773ff09e6.dir\",\n                },\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_diff_dirty(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\n        {\"file\": \"file_content\", \"dir\": {\"dir_file1\": \"dir file content\"}},\n        commit=\"initial\",\n    )\n\n    (tmp_dir / \"file\").unlink()\n    tmp_dir.gen({\"dir\": {\"dir_file2\": \"dir file 2 content\"}})\n    tmp_dir.dvc_gen(\"new_file\", \"new_file_content\")\n\n    result = dvc.diff()\n\n    assert result == {\n        \"added\": [\n            {\n                \"hash\": digest(\"dir file 2 content\"),\n                \"path\": os.path.join(\"dir\", \"dir_file2\"),\n            },\n            {\"hash\": \"86d049de17c76ac44cdcac146042ec9b\", \"path\": \"new_file\"},\n        ],\n        \"deleted\": [{\"hash\": \"7f0b6bb0b7e951b7fd2b2a4a326297e1\", \"path\": \"file\"}],\n        \"modified\": [\n            {\n                \"hash\": {\n                    \"new\": \"38175ad60f0e58ac94e0e2b7688afd81.dir\",\n                    \"old\": \"92daf39af116ca2fb245acaeb2ae65f7.dir\",\n                },\n                \"path\": os.path.join(\"dir\", \"\"),\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_no_changes(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"first\", commit=\"add a file\")\n    assert dvc.diff() == {}\n\n\ndef test_no_commits(tmp_dir):\n    from dvc.repo import Repo\n    from dvc.scm import Git\n\n    git = Git.init(tmp_dir.fs_path)\n    assert git.no_commits\n\n    assert Repo.init().diff() == {}\n\n\ndef test_abs_target(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"file\", \"text\")\n\n    assert dvc.diff(targets=(tmp_dir / \"file\").fs_path) == {\n        \"added\": [{\"path\": \"file\", \"hash\": digest(\"text\")}],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef setup_targets_test(tmp_dir):\n    tmp_dir.dvc_gen(\"file\", \"first\", commit=\"add a file\")\n\n    tmp_dir.dvc_gen({\"dir\": {\"1\": \"1\", \"2\": \"2\"}})\n    tmp_dir.dvc_gen(\"file\", \"second\")\n\n    tmp_dir.dvc_gen(os.path.join(\"dir_with\", \"file.txt\"), \"first\")\n\n\ndef test_targets_missing_path(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    with pytest.raises(FileNotFoundError):\n        dvc.diff(targets=[\"missing\"])\n\n\ndef test_targets_single_file(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    assert dvc.diff(targets=[\"file\"]) == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": \"file\",\n                \"hash\": {\"old\": digest(\"first\"), \"new\": digest(\"second\")},\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_targets_single_dir(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    dir_checksum = \"5fb6b29836c388e093ca0715c872fe2a.dir\"\n\n    expected_result = {\n        \"added\": [\n            {\"path\": os.path.join(\"dir\", \"\"), \"hash\": dir_checksum},\n            {\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")},\n            {\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"2\")},\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    assert dvc.diff(targets=[\"dir\"]) == expected_result\n    assert dvc.diff(targets=[\"dir\" + os.path.sep]) == expected_result\n\n\ndef test_targets_single_file_in_dir(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    assert dvc.diff(targets=[os.path.join(\"dir\", \"1\")]) == {\n        \"added\": [{\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")}],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_targets_two_files_in_dir(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    assert dvc.diff(targets=[os.path.join(\"dir\", \"1\"), os.path.join(\"dir\", \"2\")]) == {\n        \"added\": [\n            {\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")},\n            {\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"2\")},\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_targets_file_and_dir(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    dir_checksum = \"5fb6b29836c388e093ca0715c872fe2a.dir\"\n\n    assert dvc.diff(targets=[\"file\", \"dir\"]) == {\n        \"added\": [\n            {\"path\": os.path.join(\"dir\", \"\"), \"hash\": dir_checksum},\n            {\"path\": os.path.join(\"dir\", \"1\"), \"hash\": digest(\"1\")},\n            {\"path\": os.path.join(\"dir\", \"2\"), \"hash\": digest(\"2\")},\n        ],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": \"file\",\n                \"hash\": {\"old\": digest(\"first\"), \"new\": digest(\"second\")},\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\ndef test_targets_single_dir_with_file(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    expected_result = {\n        \"added\": [\n            {\n                \"path\": os.path.join(\"dir_with\", \"file.txt\"),\n                \"hash\": digest(\"first\"),\n            }\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n    assert dvc.diff(targets=[\"dir_with\"], recursive=True) == expected_result\n    assert (\n        dvc.diff(targets=[\"dir_with\" + os.path.sep], recursive=True) == expected_result\n    )\n\n\ndef test_targets_single_file_in_dir_with_file(tmp_dir, scm, dvc):\n    setup_targets_test(tmp_dir)\n\n    assert dvc.diff(targets=[os.path.join(\"dir_with\", \"file.txt\")]) == {\n        \"added\": [\n            {\n                \"path\": os.path.join(\"dir_with\", \"file.txt\"),\n                \"hash\": digest(\"first\"),\n            }\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n\n\n@pytest.mark.parametrize(\"commit_last\", [True, False])\ndef test_diff_add_similar_files(tmp_dir, scm, dvc, commit_last):\n    if commit_last:\n        last_commit_msg = \"commit #2\"\n        a_rev = \"HEAD~1\"\n    else:\n        last_commit_msg = None\n        a_rev = \"HEAD\"\n\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}},\n        commit=\"commit #1\",\n    )\n    tmp_dir.dvc_gen(\n        {\"dir2\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}},\n        commit=last_commit_msg,\n    )\n\n    assert dvc.diff(a_rev) == {\n        \"added\": [\n            {\n                \"path\": os.path.join(\"dir2\", \"\"),\n                \"hash\": \"cb58ee07cb01044db229e4d6121a0dfc.dir\",\n            },\n            {\n                \"path\": os.path.join(\"dir2\", \"file\"),\n                \"hash\": \"cef7ccd89dacf1ced6f5ec91d759953f\",\n            },\n            {\n                \"path\": os.path.join(\"dir2\", \"subdir\", \"file2\"),\n                \"hash\": \"fe6123a759017e4a2af4a2d19961ed71\",\n            },\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [],\n        \"not in cache\": [],\n    }\n\n\n@pytest.mark.parametrize(\"commit_last\", [True, False])\ndef test_diff_rename_folder(tmp_dir, scm, dvc, commit_last):\n    if commit_last:\n        last_commit_msg = \"commit #2\"\n        a_rev = \"HEAD~1\"\n    else:\n        last_commit_msg = None\n        a_rev = \"HEAD\"\n\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}},\n        commit=\"commit #1\",\n    )\n    (tmp_dir / \"dir\").replace(tmp_dir / \"dir2\")\n    tmp_dir.dvc_add(\"dir2\", commit=last_commit_msg)\n    assert dvc.diff(a_rev) == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"dir\", \"\"),\n                    \"new\": os.path.join(\"dir2\", \"\"),\n                },\n                \"hash\": \"cb58ee07cb01044db229e4d6121a0dfc.dir\",\n            },\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"dir\", \"file\"),\n                    \"new\": os.path.join(\"dir2\", \"file\"),\n                },\n                \"hash\": \"cef7ccd89dacf1ced6f5ec91d759953f\",\n            },\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"dir\", \"subdir\", \"file2\"),\n                    \"new\": os.path.join(\"dir2\", \"subdir\", \"file2\"),\n                },\n                \"hash\": \"fe6123a759017e4a2af4a2d19961ed71\",\n            },\n        ],\n        \"not in cache\": [],\n    }\n\n\n@pytest.mark.parametrize(\"commit_last\", [True, False])\ndef test_diff_rename_file(tmp_dir, scm, dvc, commit_last):\n    if commit_last:\n        last_commit_msg = \"commit #2\"\n        a_rev = \"HEAD~1\"\n    else:\n        last_commit_msg = None\n        a_rev = \"HEAD\"\n\n    paths = tmp_dir.gen({\"dir\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}})\n    tmp_dir.dvc_add(paths, commit=\"commit #1\")\n    (tmp_dir / \"dir\" / \"file\").replace(tmp_dir / \"dir\" / \"subdir\" / \"file3\")\n\n    tmp_dir.dvc_add(paths, commit=last_commit_msg)\n    assert dvc.diff(a_rev) == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"path\": os.path.join(\"dir\", \"\"),\n                \"hash\": {\n                    \"old\": \"cb58ee07cb01044db229e4d6121a0dfc.dir\",\n                    \"new\": \"a4ac9c339aacc60b6a3152e362c319c8.dir\",\n                },\n            }\n        ],\n        \"renamed\": [\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"dir\", \"file\"),\n                    \"new\": os.path.join(\"dir\", \"subdir\", \"file3\"),\n                },\n                \"hash\": \"cef7ccd89dacf1ced6f5ec91d759953f\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n\n\ndef test_rename_multiple_files_same_hashes(tmp_dir, scm, dvc):\n    \"\"\"Test diff by renaming >=2 instances of file with same hashes.\n\n    DVC should be able to detect that they are renames, and should not include\n    them in either of the `added` or the `deleted` section.\n    \"\"\"\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"foo\": \"foo\", \"subdir\": {\"foo\": \"foo\"}}}, commit=\"commit #1\"\n    )\n    remove(tmp_dir / \"dir\")\n    # changing foo and subdir/foo to bar and subdir/bar respectively\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"bar\": \"foo\", \"subdir\": {\"bar\": \"foo\"}}}, commit=\"commit #2\"\n    )\n    assert dvc.diff(\"HEAD~\") == {\n        \"added\": [],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"hash\": {\n                    \"new\": \"31b36b3ea5f4485e27f10578c47183b0.dir\",\n                    \"old\": \"c7684c8b3b0d28cf80d5305e2d856bfc.dir\",\n                },\n                \"path\": os.path.join(\"dir\", \"\"),\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [\n            {\n                \"hash\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"path\": {\n                    \"new\": os.path.join(\"dir\", \"bar\"),\n                    \"old\": os.path.join(\"dir\", \"foo\"),\n                },\n            },\n            {\n                \"hash\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"path\": {\n                    \"new\": os.path.join(\"dir\", \"subdir\", \"bar\"),\n                    \"old\": os.path.join(\"dir\", \"subdir\", \"foo\"),\n                },\n            },\n        ],\n    }\n\n\ndef test_diff_granular(tmp_dir, dvc, scm):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"data\": {\n                    \"subdir\": {\"subfoo\": \"subfoo\", \"subbar\": \"subbar\"},\n                    \"foo\": \"foo\",\n                    \"bar\": \"bar\",\n                },\n            },\n        }\n    )\n\n    dvc.add(os.path.join(\"dir\", \"data\"))\n    scm.add(os.path.join(\"dir\", \"data.dvc\"))\n    scm.add(os.path.join(\"dir\", \".gitignore\"))\n    scm.commit(\"data\")\n\n    assert dvc.diff() == {}\n\n    (tmp_dir / \"dir\" / \"data\" / \"subdir\" / \"new\").write_text(\"new\")\n\n    assert dvc.diff() == {\n        \"added\": [\n            {\n                \"hash\": \"22af645d1859cb5ca6da0c484f1f37ea\",\n                \"path\": os.path.join(\"dir\", \"data\", \"subdir\", \"new\"),\n            }\n        ],\n        \"deleted\": [],\n        \"modified\": [\n            {\n                \"hash\": {\n                    \"new\": \"efa5b20d5f935dcc5555b26db6e19b76.dir\",\n                    \"old\": \"1aca2c799df82929bbdd976557975546.dir\",\n                },\n                \"path\": os.path.join(\"dir\", \"data\", \"\"),\n            }\n        ],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n    assert dvc.diff(targets=[os.path.join(\"dir\", \"data\", \"subdir\")]) == {\n        \"added\": [\n            {\n                \"hash\": \"22af645d1859cb5ca6da0c484f1f37ea\",\n                \"path\": os.path.join(\"dir\", \"data\", \"subdir\", \"new\"),\n            }\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n        \"renamed\": [],\n    }\n"
  },
  {
    "path": "tests/func/test_download.py",
    "content": "import os\nfrom unittest.mock import ANY\n\nimport pytest\n\nfrom dvc.fs import download\n\nlfs_prefetch_params = [\n    pytest.param(\"abc\", \"abc\", id=\"plain\"),\n    pytest.param(\n        \"*\",\n        \"[*]\",\n        marks=pytest.mark.skipif(\n            os.name == \"nt\",\n            reason=\"forbidden character `*` on Windows filesystem\",\n        ),\n        id=\"escape-*\",\n    ),\n    pytest.param(\n        \"**\",\n        \"[*][*]\",\n        marks=pytest.mark.skipif(\n            os.name == \"nt\", reason=\"forbidden character `*` on Windows filesystem\"\n        ),\n        id=\"escape-**\",\n    ),\n    pytest.param(\n        \"?\",\n        \"[?]\",\n        marks=pytest.mark.skipif(\n            os.name == \"nt\", reason=\"forbidden character `?` on Windows filesystem\"\n        ),\n        id=\"escape-?\",\n    ),\n    pytest.param(\"[abc]\", \"[[]abc]\", id=\"escape-[seq]\"),\n    pytest.param(\"[!abc]\", \"[[]!abc]\", id=\"escape-[!seq]\"),\n]\n\n\n@pytest.mark.parametrize(\"dirname, include_name\", lfs_prefetch_params)\ndef test_lfs_prefetch_directory(tmp_dir, dvc, scm, mocker, dirname, include_name):\n    mock_fetch = mocker.patch(\"scmrepo.git.lfs.fetch\")\n    tmp_dir.scm_gen(\n        {\n            \".gitattributes\": \"data/**/* filter=lfs diff=lfs merge=lfs -text\",\n            f\"data/{dirname}/test.txt\": \"test data\",\n        },\n        commit=\"init lfs\",\n    )\n    rev = scm.get_rev()\n    with dvc.switch(rev):\n        download(dvc.dvcfs, f\"data/{dirname}\", \"data\")\n        mock_fetch.assert_called_once_with(\n            scm, [rev], include=[f\"/data/{include_name}/**\"], progress=ANY\n        )\n\n\n@pytest.mark.parametrize(\"basename, include_name\", lfs_prefetch_params)\ndef test_lfs_prefetch_file(tmp_dir, dvc, scm, mocker, basename, include_name):\n    mock_fetch = mocker.patch(\"scmrepo.git.lfs.fetch\")\n    tmp_dir.scm_gen(\n        {\n            \".gitattributes\": \"data/**/* filter=lfs diff=lfs merge=lfs -text\",\n            f\"data/{basename}.txt\": \"test data\",\n        },\n        commit=\"init lfs\",\n    )\n    rev = scm.get_rev()\n    with dvc.switch(rev):\n        download(dvc.dvcfs, f\"data/{basename}.txt\", \"data\")\n        mock_fetch.assert_called_once_with(\n            scm, [rev], include=[f\"/data/{include_name}.txt\"], progress=ANY\n        )\n"
  },
  {
    "path": "tests/func/test_du.py",
    "content": "import os\n\n\ndef test_du(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"file\": b\"file\",\n            \"dvcfile\": b\"dvcfile\",\n            \"dir\": {\n                \"dirfile\": b\"dirfile\",\n                \"subdir\": {\n                    \"subdirfile\": b\"subdirfile\",\n                },\n                \"dvcsubdir\": {\n                    \"dvcsubdirfile\": b\"dvcsubdirfile\",\n                },\n            },\n        }\n    )\n\n    dvc.add(\"dvcfile\")\n    dvc.add(os.path.join(\"dir\", \"dvcsubdir\"))\n\n    assert dvc.du(\".\", \"file\") == [(\"file\", 4)]\n    assert dvc.du(\".\", \"dvcfile\") == [(\"dvcfile\", 7)]\n    assert set(dvc.du(\".\", \"dir/subdir\")) == {\n        (\"dir/subdir/subdirfile\", 10),\n        (\"dir/subdir\", 10),\n    }\n    assert dvc.du(\".\", \"dir/subdir\", summarize=True) == [(\"dir/subdir\", 10)]\n    assert set(dvc.du(\".\", \"dir/dvcsubdir\")) == {\n        (\"dir/dvcsubdir/dvcsubdirfile\", 13),\n        (\"dir/dvcsubdir\", 13),\n    }\n    assert dvc.du(\".\", \"dir/dvcsubdir\", summarize=True) == [(\"dir/dvcsubdir\", 13)]\n    assert set(dvc.du(\".\", \"dir\")) == {\n        (\"dir/dvcsubdir\", 13),\n        (\"dir/subdir\", 10),\n        (\"dir/dirfile\", 7),\n        (\"dir\", 30),\n    }\n    assert dvc.du(\".\", \"dir\", summarize=True) == [(\"dir\", 30)]\n    assert set(dvc.du(\".\", \"/\")) == {\n        (\"/dvcfile\", 7),\n        (\"/dir\", 30),\n        (\"/file\", 4),\n        (\"/\", 41),\n    }\n    assert dvc.du(\".\", \"/\", summarize=True) == [(\"/\", 41)]\n"
  },
  {
    "path": "tests/func/test_dvcfile.py",
    "content": "import textwrap\n\nimport pytest\n\nfrom dvc.annotations import Annotation\nfrom dvc.dvcfile import (\n    LOCK_FILE,\n    PROJECT_FILE,\n    ParametrizedDumpError,\n    SingleStageFile,\n    load_file,\n)\nfrom dvc.stage.exceptions import StageFileDoesNotExistError\nfrom dvc.stage.loader import StageNotFound\nfrom dvc.utils.strictyaml import YAMLValidationError\n\nSTAGE_EXAMPLE = {\n    \"stage1\": {\n        \"cmd\": \"cp foo bar\",\n        \"desc\": \"stage desc\",\n        \"meta\": {\"key1\": \"value1\", \"key2\": \"value2\"},\n        \"deps\": [\"foo\"],\n        \"outs\": [{\"bar\": {\"desc\": \"bar desc\", \"meta\": {\"key\": \"value\"}}}],\n    }\n}\n\n\ndef test_run_load_one_for_multistage(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage1 = dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        outs_persist_no_cache=[\"foo2\"],\n        always_changed=True,\n    )\n    stage2 = load_file(dvc, PROJECT_FILE).stages[\"copy-foo-foo2\"]\n    assert stage1 == stage2\n    foo_out = stage2.outs[0]\n    assert stage2.cmd == \"cp foo foo2\"\n    assert stage2.name == \"copy-foo-foo2\"\n    assert foo_out.def_path == \"foo2\"\n    assert foo_out.persist\n    assert not foo_out.use_cache\n    assert stage2.deps[0].def_path == \"foo\"\n    assert dvc.reproduce(\":copy-foo-foo2\")\n\n\ndef test_run_load_one_for_multistage_non_existing(tmp_dir, dvc):\n    with pytest.raises(StageFileDoesNotExistError):\n        assert load_file(dvc, PROJECT_FILE).stages.get(\"copy-foo-foo2\")\n\n\ndef test_run_load_one_for_multistage_non_existing_stage_name(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        metrics=[\"foo2\"],\n        always_changed=True,\n    )\n    with pytest.raises(StageNotFound):\n        assert load_file(dvc, stage.path).stages[\"random-name\"]\n\n\ndef test_run_load_one_on_single_stage(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert isinstance(load_file(dvc, stage.path), SingleStageFile)\n    assert load_file(dvc, stage.path).stages.get(\"random-name\") == stage\n    assert load_file(dvc, stage.path).stage == stage\n\n\ndef test_has_stage_with_name(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        metrics=[\"foo2\"],\n        always_changed=True,\n    )\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    assert \"copy-foo-foo2\" in dvcfile.stages\n    assert \"copy\" not in dvcfile.stages\n\n\ndef test_load_all_multistage(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage1 = dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        metrics=[\"foo2\"],\n        always_changed=True,\n    )\n    stages = load_file(dvc, PROJECT_FILE).stages.values()\n    assert len(stages) == 1\n    assert list(stages) == [stage1]\n\n    tmp_dir.gen(\"bar\", \"bar\")\n    stage2 = dvc.run(\n        cmd=\"cp bar bar2\",\n        deps=[\"bar\"],\n        name=\"copy-bar-bar2\",\n        metrics=[\"bar2\"],\n        always_changed=True,\n    )\n    assert set(load_file(dvc, PROJECT_FILE).stages.values()) == {stage2, stage1}\n\n\ndef test_load_all_singlestage(tmp_dir, dvc):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvcfile = load_file(dvc, \"foo.dvc\")\n    assert isinstance(dvcfile, SingleStageFile)\n    assert len(dvcfile.stages) == 1\n    stages = dvcfile.stages.values()\n    assert len(stages) == 1\n    assert list(stages) == [stage1]\n\n\ndef test_try_get_single_stage_from_pipeline_file(tmp_dir, dvc):\n    from dvc.dvcfile import DvcException\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        metrics=[\"foo2\"],\n        always_changed=True,\n    )\n    with pytest.raises(DvcException):\n        assert load_file(dvc, PROJECT_FILE).stage\n\n\ndef test_stage_collection(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"dir\": {\"file1\": \"file1\", \"file2\": \"file2\"},\n            \"foo\": \"foo\",\n            \"bar\": \"bar\",\n        }\n    )\n    (stage1,) = dvc.add(\"dir\")\n    stage2 = dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        metrics=[\"foo2\"],\n        always_changed=True,\n    )\n    assert set(dvc.index.stages) == {stage1, stage2}\n\n\ndef test_remove_stage(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    stage2 = run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    dvc_file = load_file(dvc, PROJECT_FILE)\n    assert dvc_file.exists()\n    assert {\"copy-bar-foobar\", \"copy-foo-bar\"} == set(\n        dvc_file._load()[0][\"stages\"].keys()\n    )\n\n    dvc_file.remove_stage(stage)\n\n    assert list(dvc_file._load()[0][\"stages\"].keys()) == [\"copy-bar-foobar\"]\n\n    # sanity check\n    stage2.reload()\n\n    # re-check to see if it fails if there's no stage entry\n    dvc_file.remove_stage(stage)\n    dvc_file.remove(force=True)\n    # should not fail when there's no file at all.\n    dvc_file.remove_stage(stage)\n\n\ndef test_remove_stage_lockfile(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    stage2 = run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    dvc_file = load_file(dvc, PROJECT_FILE)\n    lock_file = dvc_file._lockfile\n    assert dvc_file.exists()\n    assert lock_file.exists()\n    assert {\"copy-bar-foobar\", \"copy-foo-bar\"} == set(lock_file.load()[\"stages\"].keys())\n    lock_file.remove_stage(stage)\n\n    assert list(lock_file.load()[\"stages\"].keys()) == [\"copy-bar-foobar\"]\n\n    # sanity check\n    stage2.reload()\n\n    # re-check to see if it fails if there's no stage entry\n    lock_file.remove_stage(stage)\n    lock_file.remove()\n    # should not fail when there's no file at all.\n    lock_file.remove_stage(stage)\n\n\ndef test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    dvc_file = load_file(dvc, stage.path)\n    assert dvc_file.exists()\n    dvc_file.remove_stage(stage)\n    assert not dvc_file.exists()\n\n    # re-check to see if it fails if there's no stage entry\n    dvc_file.remove_stage(stage)\n    dvc_file.remove(force=True)\n\n    # should not fail when there's no file at all.\n    dvc_file.remove_stage(stage)\n\n\ndef test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    dvc_file = load_file(dvc, stage.path)\n    lock_file = dvc_file._lockfile\n\n    data = dvc_file._load()[0]\n    lock_data = lock_file.load()\n    lock_data[\"gibberish\"] = True\n    data[\"gibberish\"] = True\n    (tmp_dir / lock_file.relpath).dump(lock_data)\n    with pytest.raises(YAMLValidationError):\n        dvc_file.remove_stage(stage)\n\n    lock_file.remove()\n    dvc_file.dump(stage, update_pipeline=False)\n\n    (tmp_dir / dvc_file.relpath).dump(data)\n    with pytest.raises(YAMLValidationError):\n        dvc_file.remove_stage(stage)\n\n\ndef test_remove_stage_preserves_comment(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\n        \"dvc.yaml\",\n        textwrap.dedent(\n            \"\"\"\\\n            stages:\n                generate-foo:\n                    cmd: \"echo foo > foo\"\n                    # This copies 'foo' text to 'foo' file.\n                    outs:\n                    - foo\n                copy-foo-bar:\n                    cmd: \"python copy.py foo bar\"\n                    deps:\n                    - foo\n                    outs:\n                    - bar\"\"\"\n        ),\n    )\n\n    dvc.reproduce(PROJECT_FILE)\n\n    dvc_file = load_file(dvc, PROJECT_FILE)\n\n    assert dvc_file.exists()\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / \"bar\").exists()\n\n    dvc_file.remove_stage(dvc_file.stages[\"copy-foo-bar\"])\n    assert (\n        \"# This copies 'foo' text to 'foo' file.\"\n        in (tmp_dir / PROJECT_FILE).read_text()\n    )\n\n\ndef test_remove_stage_removes_dvcfiles_if_no_stages_left(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"run_copy\")\n\n    dvc_file = load_file(dvc, PROJECT_FILE)\n\n    assert dvc_file.exists()\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert (tmp_dir / \"foo\").exists()\n\n    dvc_file.remove_stage(dvc_file.stages[\"run_copy\"])\n    assert not dvc_file.exists()\n    assert not (tmp_dir / LOCK_FILE).exists()\n\n\ndef test_dvcfile_dump_preserves_meta(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"run_copy\")\n    dvcfile = stage.dvcfile\n\n    data = dvcfile._load()[0]\n    metadata = {\"name\": \"copy-file\"}\n    stage.meta = metadata\n    data[\"stages\"][\"run_copy\"][\"meta\"] = metadata\n\n    dvcfile.dump(stage)\n    assert dvcfile._load()[0] == data\n    assert dvcfile._load()[0][\"stages\"][\"run_copy\"][\"meta\"] == metadata\n\n\ndef test_dvcfile_dump_preserves_desc(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage_desc = \"test stage description\"\n    out_desc = \"test out description\"\n\n    stage = run_copy(\"foo\", \"bar\", name=\"run_copy\", desc=stage_desc)\n    dvcfile = stage.dvcfile\n\n    data = dvcfile._load()[0]\n    data[\"stages\"][\"run_copy\"][\"outs\"][0] = {\"bar\": {\"desc\": out_desc}}\n    (tmp_dir / dvcfile.path).dump(data)\n\n    assert stage.desc == stage_desc\n    stage.outs[0].annot.desc = out_desc\n    dvcfile.dump(stage)\n    loaded = dvcfile._load()[0]\n    assert loaded == data\n    assert loaded[\"stages\"][\"run_copy\"][\"desc\"] == stage_desc\n    assert loaded[\"stages\"][\"run_copy\"][\"outs\"][0][\"bar\"][\"desc\"] == out_desc\n\n\ndef test_dvcfile_dump_preserves_comments(tmp_dir, dvc):\n    text = textwrap.dedent(\n        \"\"\"\\\n        stages:\n          generate-foo:\n            cmd: echo foo > foo\n            # This copies 'foo' text to 'foo' file.\n            outs:\n            - foo\"\"\"\n    )\n    tmp_dir.gen(\"dvc.yaml\", text)\n    stage = dvc.stage.load_one(name=\"generate-foo\")\n    stage.outs[0].use_cache = False\n    dvcfile = stage.dvcfile\n\n    dvcfile.dump(stage)\n    assert dvcfile._load()[1] == (text + \":\\n\\tcache: false\\n\".expandtabs())\n\n\n@pytest.mark.parametrize(\n    \"data, name\",\n    [\n        ({\"build-us\": {\"cmd\": \"echo ${foo}\"}}, \"build-us\"),\n        (\n            {\"build\": {\"foreach\": [\"us\", \"gb\"], \"do\": {\"cmd\": \"echo ${foo}\"}}},\n            \"build@us\",\n        ),\n    ],\n)\ndef test_dvcfile_try_dumping_parametrized_stage(tmp_dir, dvc, data, name):\n    (tmp_dir / \"dvc.yaml\").dump({\"stages\": data, \"vars\": [{\"foo\": \"foobar\"}]})\n\n    stage = dvc.stage.load_one(name=name)\n    dvcfile = stage.dvcfile\n\n    with pytest.raises(ParametrizedDumpError) as exc:\n        dvcfile.dump(stage)\n\n    assert str(exc.value) == f\"cannot dump a parametrized stage: '{name}'\"\n\n\ndef test_dvcfile_load_dump_stage_with_desc_meta(tmp_dir, dvc):\n    data = {\"stages\": STAGE_EXAMPLE}\n    (tmp_dir / \"dvc.yaml\").dump(data)\n\n    stage = dvc.stage.load_one(name=\"stage1\")\n    assert stage.meta == {\"key1\": \"value1\", \"key2\": \"value2\"}\n    assert stage.desc == \"stage desc\"\n    assert stage.outs[0].annot == Annotation(desc=\"bar desc\", meta={\"key\": \"value\"})\n\n    # sanity check\n    stage.dump()\n    assert (tmp_dir / \"dvc.yaml\").parse() == data\n\n\ndef test_dvcfile_load_with_plots(tmp_dir, dvc):\n    (tmp_dir / \"dvc.yaml\").dump(\n        {\n            \"plots\": [\n                {\"path/to/plot\": {\"x\": \"value\", \"y\": \"value\"}},\n                {\"path/to/another/plot\": {\"x\": \"value\", \"y\": \"value\"}},\n                {\"path/to/empty/plot\": None},\n                \"path/to/plot/str\",\n            ],\n            \"stages\": STAGE_EXAMPLE,\n        },\n    )\n    plots = list(dvc.plots.collect())\n    top_level_plots = plots[0][\"workspace\"][\"definitions\"][\"data\"][\"dvc.yaml\"][\"data\"]\n    assert all(\n        name in top_level_plots for name in (\"path/to/plot\", \"path/to/another/plot\")\n    )\n\n\ndef test_dvcfile_dos2unix(tmp_dir, dvc):\n    from dvc_data.hashfile.hash import HashInfo\n\n    (tmp_dir / \"foo.dvc\").dump({\"outs\": [{\"md5\": \"abc123\", \"size\": 3, \"path\": \"foo\"}]})\n    orig_content = (tmp_dir / \"foo.dvc\").read_text()\n    stage = dvc.stage.load_one(\"foo.dvc\")\n    assert stage.outs[0].hash_name == \"md5-dos2unix\"\n    assert stage.outs[0].hash_info == HashInfo(\"md5-dos2unix\", \"abc123\")\n    stage.dump()\n    assert (tmp_dir / \"foo.dvc\").read_text() == orig_content\n"
  },
  {
    "path": "tests/func/test_external_repo.py",
    "content": "import os\n\nfrom dvc.repo.open_repo import CLONES\nfrom dvc.repo.open_repo import _external_repo as external_repo\nfrom dvc.scm import Git\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc.utils import relpath\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.build import build\nfrom dvc_data.hashfile.transfer import transfer\n\n\ndef test_external_repo(erepo_dir, mocker):\n    with erepo_dir.chdir():\n        with erepo_dir.branch(\"branch\", new=True):\n            erepo_dir.dvc_gen(\"file\", \"branch\", commit=\"create file on branch\")\n        erepo_dir.dvc_gen(\"file\", \"master\", commit=\"create file on master\")\n\n    url = os.fspath(erepo_dir)\n\n    clone_spy = mocker.spy(Git, \"clone\")\n\n    with external_repo(url) as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"master\"\n\n    with external_repo(url, rev=\"branch\") as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"branch\"\n\n    assert clone_spy.call_count == 1\n\n\ndef test_source_change(erepo_dir):\n    url = os.fspath(erepo_dir)\n    with external_repo(url) as repo:\n        old_rev = repo.scm.get_rev()\n\n    erepo_dir.scm_gen(\"file\", \"text\", commit=\"a change\")\n\n    with external_repo(url) as repo:\n        new_rev = repo.scm.get_rev()\n\n    assert old_rev != new_rev\n\n\ndef test_cache_reused(erepo_dir, mocker, local_cloud):\n    from dvc_objects.fs import generic\n\n    erepo_dir.add_remote(config=local_cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"text\", commit=\"add file\")\n    erepo_dir.dvc.push()\n\n    download_spy = mocker.spy(generic, \"transfer\")\n\n    # Use URL to prevent any fishy optimizations\n    url = f\"file://{erepo_dir.as_posix()}\"\n    with external_repo(url) as repo:\n        repo.fetch()\n        assert download_spy.mock.call_count == 1\n\n    # Should not download second time\n    erepo_dir.scm.branch(\"branch\")\n    with external_repo(url, \"branch\") as repo:\n        repo.fetch()\n        assert download_spy.mock.call_count == 1\n\n\ndef test_known_sha(erepo_dir):\n    erepo_dir.scm.commit(\"init\")\n\n    url = f\"file://{erepo_dir.as_posix()}\"\n    with external_repo(url) as repo:\n        rev = repo.scm.get_rev()\n        prev_rev = repo.scm.resolve_rev(\"HEAD^\")\n\n    # Hits cache\n    with external_repo(url, rev) as repo:\n        pass\n\n    # No clone, no pull, copies a repo, checks out the known sha\n    with external_repo(url, prev_rev) as repo:\n        pass\n\n\ndef test_pull_subdir_file(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        subdir = erepo_dir / \"subdir\"\n        subdir.mkdir()\n        (subdir / \"file\").write_text(\"contents\")\n        erepo_dir.dvc_add(subdir / \"file\", commit=\"create file\")\n\n    dest = tmp_dir / \"file\"\n    with external_repo(os.fspath(erepo_dir)) as repo:\n        repo.dvcfs.get(\"subdir/file\", os.fspath(dest))\n\n    assert dest.is_file()\n    assert dest.read_text() == \"contents\"\n\n\ndef test_relative_remote(erepo_dir, tmp_dir):\n    # these steps reproduce the script on this issue:\n    # https://github.com/treeverse/dvc/issues/2756\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"contents\", commit=\"create file\")\n\n    upstream_dir = tmp_dir\n    upstream_url = relpath(upstream_dir, erepo_dir)\n    erepo_dir.add_remote(url=upstream_url)\n\n    erepo_dir.dvc.push()\n\n    (erepo_dir / \"file\").unlink()\n    remove(erepo_dir.dvc.cache.local.path)\n\n    url = os.fspath(erepo_dir)\n\n    with external_repo(url) as repo:\n        assert os.path.isabs(repo.config[\"remote\"][\"upstream\"][\"url\"])\n        assert os.path.isdir(repo.config[\"remote\"][\"upstream\"][\"url\"])\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"contents\"\n\n\ndef test_shallow_clone_branch(erepo_dir, mocker):\n    with erepo_dir.chdir():\n        with erepo_dir.branch(\"branch\", new=True):\n            erepo_dir.dvc_gen(\"file\", \"branch\", commit=\"create file on branch\")\n        erepo_dir.dvc_gen(\"file\", \"master\", commit=\"create file on master\")\n\n    url = os.fspath(erepo_dir)\n    clone_spy = mocker.spy(Git, \"clone\")\n\n    with external_repo(url, rev=\"branch\") as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"branch\"\n\n    clone_spy.assert_called_with(\n        url, mocker.ANY, shallow_branch=\"branch\", progress=mocker.ANY\n    )\n\n    path, _ = CLONES[url]\n    CLONES[url] = (path, True)\n\n    mock_fetch = mocker.patch.object(Git, \"fetch\")\n    with external_repo(url) as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"master\"\n    mock_fetch.assert_called_with(unshallow=True)\n\n\ndef test_shallow_clone_tag(erepo_dir, mocker):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"foo\", commit=\"init\")\n        erepo_dir.scm.tag(\"v1\")\n        erepo_dir.dvc_gen(\"file\", \"bar\", commit=\"update file\")\n\n    url = os.fspath(erepo_dir)\n\n    clone_spy = mocker.spy(Git, \"clone\")\n    with external_repo(url, rev=\"v1\") as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"foo\"\n\n    clone_spy.assert_called_with(\n        url, mocker.ANY, shallow_branch=\"v1\", progress=mocker.ANY\n    )\n\n    path, _ = CLONES[url]\n    CLONES[url] = (path, True)\n\n    mock_fetch = mocker.patch.object(Git, \"fetch\")\n    with external_repo(url, rev=\"master\") as repo:\n        with repo.dvcfs.open(\"file\") as fd:\n            assert fd.read() == \"bar\"\n    mock_fetch.assert_called_with(unshallow=True)\n\n\ndef test_subrepos_are_ignored(tmp_dir, erepo_dir):\n    subrepo = erepo_dir / \"dir\" / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"dir/foo\", \"foo\", commit=\"foo\")\n        erepo_dir.scm_gen(\"dir/bar\", \"bar\", commit=\"bar\")\n\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"file\": \"file\"}, commit=\"add files on subrepo\")\n\n    with external_repo(os.fspath(erepo_dir)) as repo:\n        repo.dvcfs.get(\"dir\", os.fspath(tmp_dir / \"out\"))\n        expected_files = {\"foo\": \"foo\", \"bar\": \"bar\", \".gitignore\": \"/foo\\n\"}\n        assert (tmp_dir / \"out\").read_text() == expected_files\n\n        # clear cache to test saving to cache\n        cache_dir = tmp_dir / repo.cache.local.path\n        remove(cache_dir)\n        os.makedirs(cache_dir)\n\n        staging, _, obj = build(\n            repo.cache.local,\n            \"dir\",\n            repo.dvcfs,\n            \"md5\",\n            ignore=repo.dvcignore,\n        )\n        transfer(\n            staging,\n            repo.cache.local,\n            {obj.hash_info},\n            shallow=False,\n            hardlink=True,\n        )\n        if os.name == \"nt\":\n            expected_gitignore_path = (\n                cache_dir / \"d5\" / \"61e684092f0ff908aa82ee9cc1e594\"\n            )\n            expected_dir_path = cache_dir / \"0d\" / \"2086760aea091f1504eafc8843bb18.dir\"\n        else:\n            expected_gitignore_path = (\n                cache_dir / \"94\" / \"7d2b84e5aa88170e80dff467a5bfb6\"\n            )\n            expected_dir_path = cache_dir / \"e1\" / \"d9e8eae5374860ae025ec84cfd85c7.dir\"\n        assert set(cache_dir.glob(\"??/*\")) == {\n            expected_dir_path,\n            expected_gitignore_path,\n            cache_dir / \"37\" / \"b51d194a7513e45b56f6524f2d51f2\",\n            cache_dir / \"ac\" / \"bd18db4cc2f85cedef654fccc4a4d8\",\n        }\n\n\ndef test_subrepos_are_ignored_for_git_tracked_dirs(tmp_dir, erepo_dir):\n    subrepo = erepo_dir / \"dir\" / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with erepo_dir.chdir():\n        scm_files = {\"foo\": \"foo\", \"bar\": \"bar\", \"subdir\": {\"lorem\": \"lorem\"}}\n        erepo_dir.scm_gen({\"dir\": scm_files}, commit=\"add scm dir\")\n\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"file\": \"file\"}, commit=\"add files on subrepo\")\n\n    with external_repo(os.fspath(erepo_dir)) as repo:\n        repo.dvcfs.get(\"dir\", os.fspath(tmp_dir / \"out\"))\n        # subrepo files should not be here\n        assert (tmp_dir / \"out\").read_text() == scm_files\n"
  },
  {
    "path": "tests/func/test_fs.py",
    "content": "import os\nfrom operator import itemgetter\n\nfrom dvc.repo import Repo\n\n\ndef test_cleanfs_subrepo(tmp_dir, dvc, scm, monkeypatch):\n    tmp_dir.gen({\"subdir\": {}})\n    subrepo_dir = tmp_dir / \"subdir\"\n    with subrepo_dir.chdir():\n        subrepo = Repo.init(subdir=True)\n        subrepo_dir.gen({\"foo\": \"foo\", \"dir\": {\"bar\": \"bar\"}})\n\n    path = subrepo_dir.fs_path\n\n    assert dvc.fs.exists(dvc.fs.join(path, \"foo\"))\n    assert dvc.fs.isfile(dvc.fs.join(path, \"foo\"))\n    assert dvc.fs.exists(dvc.fs.join(path, \"dir\"))\n    assert dvc.fs.isdir(dvc.fs.join(path, \"dir\"))\n\n    assert subrepo.fs.exists(subrepo.fs.join(path, \"foo\"))\n    assert subrepo.fs.isfile(subrepo.fs.join(path, \"foo\"))\n    assert subrepo.fs.exists(subrepo.fs.join(path, \"dir\"))\n    assert subrepo.fs.isdir(subrepo.fs.join(path, \"dir\"))\n\n\ndef test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n    subrepo_dir = tmp_dir / \"subdir\"\n    subrepo_dir.mkdir()\n    with subrepo_dir.chdir():\n        Repo.init(subdir=True)\n    scm.add([\"subdir\"])\n    scm.commit(\"Add subrepo\")\n\n    dvc_fs = dvc.fs\n    dvc._reset()\n    scm_fs = scm.get_fs(\"HEAD\")\n    path = os.fspath(tmp_dir)\n    get_dirs = itemgetter(1)\n\n    assert set(get_dirs(next(dvc_fs.walk(path)))) == {\".dvc\", \"subdir\", \".git\"}\n    assert set(get_dirs(next(scm_fs.walk(\"/\")))) == {\".dvc\", \"subdir\"}\n"
  },
  {
    "path": "tests/func/test_gc.py",
    "content": "import datetime\nimport logging\nimport os\nimport shutil\nimport textwrap\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.exceptions import CollectCacheError, InvalidArgumentError, RevCollectionError\nfrom dvc.fs import LocalFileSystem\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.db.local import LocalHashFileDB\n\n\n@pytest.fixture\ndef good_and_bad_cache(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (stage,) = tmp_dir.dvc_gen(\n        \"data\",\n        {\"sub\": {\"data_sub\": \"data_sub\", \"data\": \"data\", \"тест\": \"проверка\"}},\n    )\n    raw_dir_hash = stage.outs[0].hash_info.as_raw().value\n    odb = dvc.cache.local\n\n    bad_cache = {raw_dir_hash}\n    for i in [\"123\", \"234\", \"345\"]:\n        odb.add_bytes(i, i.encode(\"utf8\"))\n        bad_cache.add(i)\n\n    good_cache = {md5 for md5 in odb.all() if md5 not in bad_cache}\n    return good_cache, bad_cache\n\n\ndef test_gc_api(dvc, good_and_bad_cache):\n    dvc.gc(workspace=True)\n    odb = dvc.cache.local\n    good_cache, _ = good_and_bad_cache\n    assert set(odb.all()) == good_cache\n\n\ndef test_gc_cli(dvc, good_and_bad_cache):\n    assert main([\"gc\", \"-wf\"]) == 0\n    odb = dvc.cache.local\n    good_cache, _ = good_and_bad_cache\n    assert set(odb.all()) == good_cache\n\n\ndef test_gc_branches_tags(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"file\", \"v1.0\", commit=\"v1.0\")\n    scm.tag(\"v1.0\")\n\n    with tmp_dir.branch(\"test\", new=True):\n        dvc.remove(\"file.dvc\")\n        tmp_dir.dvc_gen(\"file\", \"test\", commit=\"test\")\n\n    dvc.remove(\"file.dvc\")\n    tmp_dir.dvc_gen(\"file\", \"trash\", commit=\"trash\")\n\n    dvc.remove(\"file.dvc\")\n    tmp_dir.dvc_gen(\"file\", \"master\", commit=\"trash\")\n\n    odb = dvc.cache.local\n    assert len(list(odb.all())) == 4\n\n    dvc.gc(all_tags=True, all_branches=True)\n    assert len(list(odb.all())) == 3\n\n    dvc.gc(all_tags=False, all_branches=True)\n    assert len(list(odb.all())) == 2\n\n    dvc.gc(all_tags=True, all_branches=False)\n    assert len(list(odb.all())) == 1\n\n\ndef test_gc_multiple_dvc_repos(tmp_dir, scm, dvc, erepo_dir):\n    tmp_dir.dvc_gen(\"only_in_first\", \"only in main repo\")\n    tmp_dir.dvc_gen(\"in_both\", \"in both repos\")\n\n    erepo_dir.dvc.cache.local.path = dvc.cache.local.path\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"in_both\", \"in both repos\")\n        erepo_dir.dvc_gen(\"only_in_second\", \"only in additional repo\")\n\n    odb = dvc.cache.local\n    assert len(list(odb.all())) == 3\n\n    dvc.gc(repos=[erepo_dir], workspace=True)\n    assert len(list(odb.all())) == 3\n\n    dvc.gc(workspace=True)\n    assert len(list(odb.all())) == 2\n\n\ndef test_gc_multiple_dvc_repos_all_branches_num(tmp_dir, scm, dvc, erepo_dir):\n    tmp_dir.dvc_gen(\"main\", \"main\", commit=\"main\")\n\n    erepo_dir.dvc.cache.local.path = dvc.cache.local.path\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(\"base\", \"base\", commit=\"base\")\n        with erepo_dir.branch(\"feature\", new=True):\n            erepo_dir.dvc_gen(\"shared\", \"feature-v1\", commit=\"feature-v1\")\n            erepo_dir.dvc.remove(\"shared.dvc\")\n            erepo_dir.dvc_gen(\"shared\", \"feature-v2\", commit=\"feature-v2\")\n\n    odb = dvc.cache.local\n    assert len(list(odb.all())) == 3\n\n    dvc.gc(all_branches=True, num=2, repos=[erepo_dir])\n    assert len(list(odb.all())) == 3\n\n    dvc.gc(all_branches=True, repos=[erepo_dir])\n    assert len(list(odb.all())) == 2\n\n\ndef test_all_commits(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"testfile\", \"uncommitted\")\n    tmp_dir.dvc_gen(\"testfile\", \"committed\", commit=\"committed\")\n    tmp_dir.dvc_gen(\"testfile\", \"modified\", commit=\"modified\")\n    tmp_dir.dvc_gen(\"testfile\", \"workspace\")\n\n    n = _count_files(dvc.cache.local.path)\n    dvc.gc(all_commits=True)\n\n    # Only one uncommitted file should go away\n    assert _count_files(dvc.cache.local.path) == n - 1\n\n\ndef test_gc_no_dir_cache(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    (dir_stage,) = tmp_dir.dvc_gen({\"dir\": {\"x\": \"x\", \"subdir\": {\"y\": \"y\"}}})\n\n    remove(dir_stage.outs[0].cache_path)\n\n    with pytest.raises(RevCollectionError) as exc:\n        dvc.gc(workspace=True)\n    assert type(exc.value.__cause__) is CollectCacheError\n\n    assert _count_files(dvc.cache.local.path) == 4\n    dvc.gc(force=True, workspace=True)\n    assert _count_files(dvc.cache.local.path) == 2\n\n\ndef _count_files(path):\n    return sum(len(files) for _, _, files in os.walk(path))\n\n\ndef test_gc_no_unpacked_dir(tmp_dir, dvc):\n    dir_stages = tmp_dir.dvc_gen({\"dir\": {\"file\": \"text\"}})\n    dvc.status()\n\n    os.remove(\"dir.dvc\")\n    unpackeddir = dir_stages[0].outs[0].cache_path + LocalHashFileDB.UNPACKED_DIR_SUFFIX\n\n    # older (pre 1.0) versions of dvc used to generate this dir\n    shutil.copytree(\"dir\", unpackeddir)\n    assert os.path.exists(unpackeddir)\n\n    dvc.gc(force=True, workspace=True)\n    assert not os.path.exists(unpackeddir)\n\n\ndef test_gc_without_workspace_raises_error(tmp_dir, dvc):\n    dvc.gc(force=True, workspace=True)  # works without error\n\n    from dvc.exceptions import InvalidArgumentError\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.gc(force=True)\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.gc(force=True, workspace=False)\n\n\ndef test_gc_cloud_with_or_without_specifier(tmp_dir, erepo_dir, local_cloud):\n    erepo_dir.add_remote(config=local_cloud.config)\n    dvc = erepo_dir.dvc\n    from dvc.exceptions import InvalidArgumentError\n\n    with pytest.raises(InvalidArgumentError):\n        dvc.gc(force=True, cloud=True)\n\n    dvc.gc(cloud=True, all_tags=True)\n    dvc.gc(cloud=True, all_commits=True)\n    dvc.gc(cloud=True, all_branches=True)\n    dvc.gc(cloud=True, all_commits=False, all_branches=True, all_tags=True)\n\n\ndef test_gc_without_workspace_on_tags_branches_commits(tmp_dir, dvc):\n    dvc.gc(force=True, all_tags=True)\n    dvc.gc(force=True, all_commits=True)\n    dvc.gc(force=False, all_branches=True)\n\n    # even if workspace is disabled, and others are enabled, assume as if\n    # workspace is enabled.\n    dvc.gc(force=False, all_branches=True, all_commits=False, workspace=False)\n\n\n@pytest.mark.parametrize(\"cloud\", [\"c\", \"\"])\ndef test_gc_without_workspace(tmp_dir, dvc, caplog, cloud):\n    with caplog.at_level(logging.WARNING, logger=\"dvc\"):\n        assert main([\"gc\", f\"-{cloud}vf\"]) == 255\n\n    assert (\n        \"Either of `-w|--workspace`, `-a|--all-branches`, `-T|--all-tags` \"\n        \"`--all-experiments`, `--all-commits`, `--date` or `--rev` \"\n        \"needs to be set.\" in caplog.text\n    )\n\n\ndef test_gc_with_possible_args_positive(tmp_dir, dvc):\n    for flag in [\"-w\", \"-a\", \"-T\", \"--all-commits\", \"-aT\", \"-wa\", \"-waT\"]:\n        assert main([\"gc\", \"-vf\", flag]) == 0\n\n\ndef test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory, local_remote):\n    for flag in [\"-cw\", \"-ca\", \"-cT\", \"-caT\", \"-cwT\"]:\n        assert main([\"gc\", \"-vf\", flag]) == 0\n\n\ndef test_gc_cloud_remove_order(tmp_dir, scm, dvc, mocker, local_remote):\n    (standalone, dir1, dir2) = tmp_dir.dvc_gen(\n        {\n            \"file1\": \"standalone\",\n            \"dir1\": {\"file2\": \"file2\"},\n            \"dir2\": {\"file3\": \"file3\", \"file4\": \"file4\"},\n        }\n    )\n    dvc.push()\n    dvc.remove(standalone.relpath)\n    dvc.remove(dir1.relpath)\n    dvc.remove(dir2.relpath)\n    dvc.gc(workspace=True)\n\n    mocked_remove = mocker.patch.object(LocalFileSystem, \"remove\", autospec=True)\n    dvc.gc(workspace=True, cloud=True)\n    assert len(mocked_remove.mock_calls) == 4\n    # Unpacked dir should be the first removed\n    for args in mocked_remove.call_args_list[:2]:\n        checksum = str(args[0][1])\n        assert checksum.endswith(\".dir.unpacked\")\n    # Then, bulk remove should be applied\n\n    # First to `.dir`\n    checksums = mocked_remove.call_args_list[2][0][1]\n    assert isinstance(checksums, list)\n    assert all(x.endswith(\".dir\") for x in checksums)\n    # And later to individual files\n    checksums = mocked_remove.call_args_list[3][0][1]\n    assert isinstance(checksums, list)\n    assert not any(x.endswith(\".dir\") for x in checksums)\n\n\ndef test_gc_not_collect_pipeline_tracked_files(tmp_dir, dvc, run_copy):\n    from dvc.dvcfile import PROJECT_FILE, load_file\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    tmp_dir.gen(\"bar\", \"bar\")\n\n    run_copy(\"foo\", \"foo2\", name=\"copy\")\n    shutil.rmtree(dvc.stage_cache.cache_dir)\n    assert _count_files(dvc.cache.local.path) == 1\n    dvc.gc(workspace=True, force=True)\n    assert _count_files(dvc.cache.local.path) == 1\n\n    # remove pipeline file and lockfile and check\n    load_file(dvc, PROJECT_FILE).remove(force=True)\n    dvc.gc(workspace=True, force=True)\n    assert _count_files(dvc.cache.local.path) == 0\n\n\ndef test_gc_all_experiments(tmp_dir, scm, dvc):\n    from dvc.repo.experiments.refs import ExpRefInfo\n\n    (foo,) = tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"foo\")\n    foo_hash = foo.outs[0].hash_info.value\n\n    tmp_dir.dvc_gen(\"foo\", \"bar\", commit=\"bar\")\n    baseline = scm.get_rev()\n\n    (baz,) = tmp_dir.dvc_gen(\"foo\", \"baz\", commit=\"baz\")\n    baz_hash = baz.outs[0].hash_info.value\n\n    ref = ExpRefInfo(baseline, \"exp\")\n    scm.set_ref(str(ref), scm.get_rev())\n\n    dvc.gc(all_experiments=True, force=True)\n\n    assert not (\n        tmp_dir / \".dvc\" / \"cache\" / \"files\" / \"md5\" / foo_hash[:2] / foo_hash[2:]\n    ).exists()\n    assert (\n        tmp_dir / \".dvc\" / \"cache\" / \"files\" / \"md5\" / baz_hash[:2] / baz_hash[2:]\n    ).read_text() == \"baz\"\n\n\ndef test_gc_rev_num(tmp_dir, scm, dvc):\n    num = 2\n\n    hashes = {}\n    for i in range(4):\n        i_str = str(i)\n        f = tmp_dir.dvc_gen(\"foo\", i_str, commit=i_str)\n        hashes[i] = f[0].outs[0].hash_info.value\n\n    dvc.gc(rev=\"HEAD\", num=num, force=True)\n\n    for n, i in enumerate(reversed(range(4))):\n        cache = (\n            tmp_dir / \".dvc\" / \"cache\" / \"files\" / \"md5\" / hashes[i][:2] / hashes[i][2:]\n        )\n        if n >= num:\n            assert not cache.exists()\n        else:\n            assert cache.read_text() == str(i)\n\n\ndef test_date(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"testfile\", \"content\", commit=\"add testfile\")\n\n    now = datetime.datetime.now(tz=datetime.timezone.utc)\n    datestamp = (now.date() + datetime.timedelta(days=1)).isoformat()\n\n    tmp_dir.dvc_gen(\"testfile\", \"modified\", commit=\"modified\")\n\n    dvc.gc(commit_date=datestamp)\n\n    assert _count_files(dvc.cache.local.path) == 1\n    assert dvc.cache.local.exists(\"9ae73c65f418e6f79ceb4f0e4a4b98d5\")  # \"modified\"\n\n    tmp_dir.dvc_gen(\"testfile\", \"modified, again\", commit=\"modify\")\n\n    datestamp = (now.date() - datetime.timedelta(days=1)).isoformat()\n    dvc.gc(commit_date=datestamp)\n    assert _count_files(dvc.cache.local.path) == 2\n    assert dvc.cache.local.exists(\"9ae73c65f418e6f79ceb4f0e4a4b98d5\")\n    assert dvc.cache.local.exists(\n        \"3bcf3b1be3e794a97a5a6b93a005784c\"\n    )  # \"modified, again\"\n\n\ndef test_gc_not_in_remote(tmp_dir, scm, dvc, mocker, local_remote):\n    (standalone, dir1, _) = tmp_dir.dvc_gen(\n        {\n            \"file1\": \"standalone\",\n            \"dir1\": {\"file2\": \"file2\"},\n            \"dir2\": {\"file3\": \"file3\", \"file4\": \"file4\"},\n        }\n    )\n    mocked_remove = mocker.spy(LocalFileSystem, \"remove\")\n    dvc.gc(workspace=True)\n    assert not mocked_remove.call_args_list\n\n    dvc.push([\"file1\", \"dir1\"])\n\n    dvc.gc(workspace=True, not_in_remote=True)\n\n    assert len(mocked_remove.mock_calls) == 3\n\n    arg_list = mocked_remove.call_args_list\n\n    standalone_hash = standalone.outs[0].hash_info.value\n    dir1_hash = dir1.outs[0].hash_info.value\n    assert f\"{dir1_hash[2:]}.unpacked\" in arg_list[0][0][1]\n    assert f\"{dir1_hash[2:]}\" in arg_list[1][0][1][0]\n    # We expect 2 calls: standalone_hash and dir1/file2/file2\n    assert len(arg_list[2][0][1]) == 2\n    # Order is not guaranteed here.\n    assert (\n        f\"{standalone_hash[2:]}\" in arg_list[2][0][1][0]\n        or f\"{standalone_hash[2:]}\" in arg_list[2][0][1][1]\n    )\n\n\ndef test_gc_not_in_remote_remote_arg(tmp_dir, scm, dvc, mocker, make_remote):\n    make_remote(\"local_remote\", typ=\"local\")\n    make_remote(\"other_remote\", typ=\"local\", default=False)\n\n    tmp_dir.dvc_gen(\n        {\n            \"file1\": \"standalone\",\n            \"dir1\": {\"file2\": \"file2\"},\n            \"dir2\": {\"file3\": \"file3\", \"file4\": \"file4\"},\n        }\n    )\n    mocked_remove = mocker.spy(LocalFileSystem, \"remove\")\n\n    dvc.push([\"file1\", \"dir1\"], remote=\"other_remote\")\n\n    dvc.gc(workspace=True, not_in_remote=True)\n\n    assert not mocked_remove.mock_calls\n\n    dvc.gc(workspace=True, not_in_remote=True, remote=\"other_remote\")\n\n    assert len(mocked_remove.mock_calls) == 3\n\n\ndef test_gc_not_in_remote_with_remote_field(tmp_dir, scm, dvc, mocker, make_remote):\n    make_remote(\"local_remote\", typ=\"local\")\n    make_remote(\"other_remote\", typ=\"local\", default=False)\n\n    text = textwrap.dedent(\n        \"\"\"\\\n        outs:\n        - path: foo\n          remote: other_remote\n          hash: md5\n    \"\"\"\n    )\n    tmp_dir.gen(\"foo.dvc\", text)\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.push()\n\n    mocked_remove = mocker.spy(LocalFileSystem, \"remove\")\n    dvc.gc(workspace=True, not_in_remote=True)\n    assert len(mocked_remove.mock_calls) == 1\n\n\ndef test_gc_not_in_remote_cloud(tmp_dir, scm, dvc):\n    with pytest.raises(\n        InvalidArgumentError,\n        match=\"`--not-in-remote` and `--cloud` are mutually exclusive\",\n    ):\n        dvc.gc(workspace=True, not_in_remote=True, cloud=True)\n\n\ndef test_gc_cloud_remote_field(tmp_dir, scm, dvc, mocker, make_remote):\n    make_remote(\"local_remote\", typ=\"local\")\n    make_remote(\"other_remote\", typ=\"local\", default=False)\n\n    text = textwrap.dedent(\n        \"\"\"\\\n        outs:\n        - path: foo\n          remote: other_remote\n          hash: md5\n    \"\"\"\n    )\n    tmp_dir.gen(\"foo.dvc\", text)\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.push()\n    tmp_dir.dvc_gen(\"foo\", \"bar\")\n\n    mocked_remove = mocker.spy(LocalFileSystem, \"remove\")\n    dvc.gc(workspace=True, cloud=True)\n    assert len(mocked_remove.mock_calls) == 2  # local and other_remote\n\n\ndef test_gc_dry(dvc, good_and_bad_cache):\n    dvc.gc(workspace=True, dry=True)\n    odb = dvc.cache.local\n    good_cache, _ = good_and_bad_cache\n    assert set(odb.all()) != good_cache\n\n\ndef test_gc_logging(caplog, dvc, good_and_bad_cache):\n    with caplog.at_level(logging.INFO, logger=\"dvc\"):\n        dvc.gc(workspace=True)\n\n    # Check that 3 objects were removed from either repo or local cache\n    # (they point to the same ODB, so only one will be logged due to deduplication)\n    has_repo_log = \"Removed 3 objects from repo cache.\" in caplog.text\n    has_local_log = \"Removed 3 objects from local cache.\" in caplog.text\n\n    # Exactly one of repo or local should have the log, not both (deduplication)\n    assert has_repo_log ^ has_local_log, (\n        \"Should have exactly one log for repo/local cache\"\n    )\n\n    assert \"No unused 'legacy' cache to remove.\" in caplog.text\n\n\ndef test_gc_skip_failed(tmp_dir, dvc):\n    with open(\"dvc.yaml\", mode=\"w\") as f:\n        f.write(\"\\ninvalid\")\n\n    with pytest.raises(RevCollectionError):\n        dvc.gc(force=True, workspace=True)\n\n    dvc.gc(force=True, workspace=True, skip_failed=True)\n\n\ndef test_gc_dry_logs_paths(caplog, tmp_dir, dvc):\n    \"\"\"Test that dry run logs paths of objects to be removed.\"\"\"\n    odb = dvc.cache.local\n\n    # Add some unused objects directly to cache\n    unused_hashes = [\"test123\", \"test456\", \"test789\"]\n    for hash_val in unused_hashes:\n        odb.add_bytes(hash_val, hash_val.encode(\"utf8\"))\n\n    with caplog.at_level(logging.INFO, logger=\"dvc_data.hashfile.gc\"):\n        dvc.gc(workspace=True, dry=True)\n\n    # Verify that paths are logged by dvc-data layer\n    for hash_val in unused_hashes:\n        expected_path = odb.oid_to_path(hash_val)\n        assert f\"Removing {expected_path}\" in caplog.text\n"
  },
  {
    "path": "tests/func/test_get.py",
    "content": "import errno\nimport logging\nimport os\n\nimport pytest\n\nfrom dvc.cachemgr import CacheManager\nfrom dvc.cli import main\nfrom dvc.exceptions import FileExistsLocallyError\nfrom dvc.fs import system\nfrom dvc.repo import Repo\nfrom dvc.repo.get import GetDVCFileError\nfrom dvc.testing.tmp_dir import make_subrepo\n\n\ndef test_get_repo_file(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"contents\", commit=\"create file\")\n\n    Repo.get(os.fspath(erepo_dir), \"file\", \"file_imported\")\n\n    assert os.path.isfile(\"file_imported\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"contents\"\n\n\ndef test_get_repo_file_no_override(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file1\", \"file1 contents\", commit=\"create file\")\n        erepo_dir.dvc_gen(\"file2\", \"file2 contents\", commit=\"create file2\")\n\n    Repo.get(os.fspath(erepo_dir), \"file1\", \"file_imported\")\n    # getting another file with a name that already exists in Repo.\n    with pytest.raises(FileExistsLocallyError) as exc_info:\n        Repo.get(os.fspath(erepo_dir), \"file2\", \"file_imported\")\n\n    # Make sure it's a functional FileExistsError with errno\n    assert isinstance(exc_info.value, FileExistsError)\n    assert exc_info.value.errno == errno.EEXIST\n\n    assert os.path.isfile(\"file_imported\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"file1 contents\"\n\n\ndef test_get_repo_file_with_override(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file1\", \"file1 contents\", commit=\"create file\")\n        erepo_dir.dvc_gen(\"file2\", \"file2 contents\", commit=\"create file2\")\n\n    Repo.get(os.fspath(erepo_dir), \"file1\", \"file_imported\")\n\n    # override with the 2nd file\n    Repo.get(os.fspath(erepo_dir), \"file2\", \"file_imported\", force=True)\n\n    assert os.path.isfile(\"file_imported\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"file2 contents\"\n\n\ndef test_get_repo_dir(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"file\": \"contents\"}}, commit=\"create dir\")\n\n    Repo.get(os.fspath(erepo_dir), \"dir\", \"dir_imported\")\n\n    assert (tmp_dir / \"dir_imported\").read_text() == {\"file\": \"contents\"}\n\n\ndef test_get_repo_broken_dir(tmp_dir, erepo_dir):\n    import shutil\n\n    from dvc_data.index import DataIndexDirError\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"broken\": {\"file\": \"contents\"}})\n        erepo_dir.dvc.cache.local.clear()\n        shutil.rmtree(erepo_dir / \"broken\")\n\n    with pytest.raises(DataIndexDirError):\n        Repo.get(os.fspath(erepo_dir), \"broken\", \"out\")\n\n    assert not (tmp_dir / \"out\").exists()\n\n\n@pytest.mark.parametrize(\"erepo_type\", [\"git_dir\", \"erepo_dir\"])\ndef test_get_git_file(request, tmp_dir, erepo_type):\n    src = \"some_file\"\n    dst = \"some_file_imported\"\n\n    erepo = request.getfixturevalue(erepo_type)\n    erepo.scm_gen({src: \"hello\"}, commit=\"add a regular file\")\n\n    Repo.get(os.fspath(erepo), src, dst)\n\n    assert (tmp_dir / dst).read_text() == \"hello\"\n\n\n@pytest.mark.parametrize(\"erepo_type\", [\"git_dir\", \"erepo_dir\"])\ndef test_get_git_dir(request, tmp_dir, erepo_type):\n    src = \"some_directory\"\n    dst = \"some_directory_imported\"\n\n    erepo = request.getfixturevalue(erepo_type)\n    erepo.scm_gen({src: {\"dir\": {\"file.txt\": \"hello\"}}}, commit=\"add a regular dir\")\n\n    Repo.get(os.fspath(erepo), src, dst)\n\n    assert (tmp_dir / dst).read_text() == {\"dir\": {\"file.txt\": \"hello\"}}\n\n\ndef test_cache_type_is_properly_overridden(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        with erepo_dir.dvc.config.edit() as conf:\n            conf[\"cache\"][\"type\"] = \"symlink\"\n        erepo_dir.dvc.cache = CacheManager(erepo_dir.dvc)\n        erepo_dir.scm_add(\n            [erepo_dir.dvc.config.files[\"repo\"]], \"set cache type to symlinks\"\n        )\n        erepo_dir.dvc_gen(\"file\", \"contents\", \"create file\")\n    assert system.is_symlink(erepo_dir / \"file\")\n\n    Repo.get(os.fspath(erepo_dir), \"file\", \"file_imported\")\n\n    assert not system.is_symlink(\"file_imported\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"contents\"\n\n\ndef test_get_repo_rev(tmp_dir, erepo_dir):\n    with erepo_dir.chdir(), erepo_dir.branch(\"branch\", new=True):\n        erepo_dir.dvc_gen(\"file\", \"contents\", commit=\"create file on branch\")\n\n    Repo.get(os.fspath(erepo_dir), \"file\", \"file_imported\", rev=\"branch\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"contents\"\n\n\ndef test_get_from_non_dvc_repo(tmp_dir, git_dir):\n    git_dir.scm_gen({\"some_file\": \"contents\"}, commit=\"create file\")\n\n    Repo.get(os.fspath(git_dir), \"some_file\", \"file_imported\")\n    assert (tmp_dir / \"file_imported\").read_text() == \"contents\"\n\n\ndef test_get_a_dvc_file(tmp_dir, erepo_dir):\n    with pytest.raises(GetDVCFileError):\n        Repo.get(os.fspath(erepo_dir), \"some_file.dvc\")\n\n\ndef test_non_cached_output(tmp_dir, erepo_dir):\n    src = \"non_cached_file\"\n    dst = src + \"_imported\"\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc.run(\n            outs_no_cache=[src], cmd=\"echo hello > non_cached_file\", name=\"gen\"\n        )\n        erepo_dir.scm_add([\"dvc.lock\", \"dvc.yaml\"], commit=\"add non-cached output\")\n\n    Repo.get(os.fspath(erepo_dir), src, dst)\n\n    assert (tmp_dir / dst).is_file()\n    # NOTE: using strip() to account for `echo` differences on win and *nix\n    assert (tmp_dir / dst).read_text().strip() == \"hello\"\n\n\n# https://github.com/treeverse/dvc/pull/2837#discussion_r352123053\ndef test_absolute_file_outside_repo(tmp_dir, erepo_dir):\n    with pytest.raises(FileNotFoundError):\n        Repo.get(os.fspath(erepo_dir), \"/root/\")\n\n\ndef test_absolute_file_outside_git_repo(tmp_dir, git_dir):\n    with pytest.raises(FileNotFoundError):\n        Repo.get(os.fspath(git_dir), \"/root/\")\n\n\ndef test_unknown_path(tmp_dir, erepo_dir):\n    with pytest.raises(FileNotFoundError):\n        Repo.get(os.fspath(erepo_dir), \"a_non_existing_file\")\n\n\n@pytest.mark.parametrize(\"dname\", [\".\", \"dir\", \"dir/subdir\"])\ndef test_get_to_dir(tmp_dir, erepo_dir, dname):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"contents\", commit=\"create file\")\n\n    os.makedirs(dname, exist_ok=True)\n\n    Repo.get(os.fspath(erepo_dir), \"file\", dname)\n\n    assert (tmp_dir / dname).is_dir()\n    assert (tmp_dir / dname / \"file\").read_text() == \"contents\"\n\n\ndef test_get_from_non_dvc_master(tmp_dir, git_dir):\n    with git_dir.chdir(), git_dir.branch(\"branch\", new=True):\n        git_dir.init(dvc=True)\n        git_dir.dvc_gen(\"some_file\", \"some text\", commit=\"create some file\")\n\n    Repo.get(os.fspath(git_dir), \"some_file\", out=\"some_dst\", rev=\"branch\")\n\n    assert (tmp_dir / \"some_dst\").read_text() == \"some text\"\n\n\ndef test_get_file_from_dir(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\n            {\n                \"dir\": {\n                    \"1\": \"1\",\n                    \"2\": \"2\",\n                    \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"},\n                }\n            },\n            commit=\"create dir\",\n        )\n\n    Repo.get(os.fspath(erepo_dir), os.path.join(\"dir\", \"1\"))\n    assert (tmp_dir / \"1\").read_text() == \"1\"\n\n    Repo.get(os.fspath(erepo_dir), os.path.join(\"dir\", \"2\"), out=\"file\")\n    assert (tmp_dir / \"file\").read_text() == \"2\"\n\n    Repo.get(os.fspath(erepo_dir), os.path.join(\"dir\", \"subdir\"))\n    assert (tmp_dir / \"subdir\" / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"subdir\" / \"bar\").read_text() == \"bar\"\n\n    Repo.get(os.fspath(erepo_dir), os.path.join(\"dir\", \"subdir\", \"foo\"), out=\"X\")\n    assert (tmp_dir / \"X\").read_text() == \"foo\"\n\n\ndef test_get_url_positive(tmp_dir, erepo_dir, caplog, local_cloud):\n    erepo_dir.add_remote(config=local_cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo\")\n    erepo_dir.dvc.push()\n\n    caplog.clear()\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert main([\"get\", os.fspath(erepo_dir), \"foo\", \"--show-url\"]) == 0\n        assert not caplog.text\n\n\ndef test_get_url_not_existing(tmp_dir, erepo_dir, caplog):\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert (\n            main([\"get\", os.fspath(erepo_dir), \"not-existing-file\", \"--show-url\"]) != 0\n        )\n\n\ndef test_get_url_git_only_repo(tmp_dir, scm, caplog):\n    tmp_dir.scm_gen({\"foo\": \"foo\"}, commit=\"initial\")\n\n    with caplog.at_level(logging.ERROR):\n        assert main([\"get\", os.fspath(tmp_dir), \"foo\", \"--show-url\"]) != 0\n\n\ndef test_get_pipeline_tracked_outs(tmp_dir, dvc, scm, git_dir, run_copy, local_remote):\n    from dvc.dvcfile import LOCK_FILE, PROJECT_FILE\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    dvc.push()\n\n    dvc.scm.add([PROJECT_FILE, LOCK_FILE])\n    dvc.scm.commit(\"add pipeline stage\")\n\n    with git_dir.chdir():\n        Repo.get(f\"file://{tmp_dir.as_posix()}\", \"bar\", out=\"baz\")\n        assert (git_dir / \"baz\").read_text() == \"foo\"\n\n\ndef test_get_mixed_dir(tmp_dir, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(os.path.join(\"dir\", \"foo\"), \"foo\", commit=\"foo\")\n        erepo_dir.scm_gen(os.path.join(\"dir\", \"bar\"), \"bar\", commit=\"bar\")\n\n    Repo.get(os.fspath(erepo_dir), \"dir\")\n    assert (tmp_dir / \"dir\").read_text() == {\n        \".gitignore\": \"/foo\\n\",\n        \"foo\": \"foo\",\n        \"bar\": \"bar\",\n    }\n\n\n@pytest.mark.parametrize(\"is_dvc\", [True, False])\n@pytest.mark.parametrize(\"files\", [{\"foo\": \"foo\"}, {\"dir\": {\"bar\": \"bar\"}}])\ndef test_get_from_subrepos(tmp_dir, erepo_dir, is_dvc, files):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen\n    with subrepo.chdir():\n        gen(files, commit=\"add files in subrepo\")\n\n    key = next(iter(files))\n    Repo.get(os.fspath(erepo_dir), f\"subrepo/{key}\", out=\"out\")\n\n    assert (tmp_dir / \"out\").read_text() == files[key]\n\n\ndef test_granular_get_from_subrepos(tmp_dir, erepo_dir):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"files in subrepo\")\n\n    path = os.path.join(\"subrepo\", \"dir\", \"bar\")\n    Repo.get(os.fspath(erepo_dir), path, out=\"out\")\n    assert (tmp_dir / \"out\").read_text() == \"bar\"\n\n\ndef test_get_complete_repo(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"files in subrepo\")\n\n    Repo.get(os.fspath(erepo_dir), \"subrepo\", out=\"out_sub\")\n    assert (tmp_dir / \"out_sub\").read_text() == {\n        \".gitignore\": \"/dir\\n\",\n        \"dir\": {\"bar\": \"bar\"},\n    }\n\n    Repo.get(os.fspath(erepo_dir), \".\", out=\"out\")\n    assert (tmp_dir / \"out\").read_text() == {\".gitignore\": \"/foo\\n\", \"foo\": \"foo\"}\n"
  },
  {
    "path": "tests/func/test_get_url.py",
    "content": "import errno\nimport os\n\nimport pytest\n\nfrom dvc.config import Config\nfrom dvc.exceptions import FileExistsLocallyError, URLMissingError\nfrom dvc.repo import Repo\nfrom dvc.testing.workspace_tests import TestGetUrl as _TestGetUrl\n\n\ndef test_get_file(tmp_dir):\n    tmp_dir.gen({\"foo\": \"foo contents\"})\n\n    Repo.get_url(\"foo\", \"foo_imported\")\n\n    assert (tmp_dir / \"foo_imported\").is_file()\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo contents\"\n\n\ndef test_get_file_conflict_and_override(tmp_dir):\n    tmp_dir.gen({\"foo\": \"foo contents\"})\n    tmp_dir.gen({\"bar\": \"bar contents\"})\n\n    with pytest.raises(FileExistsLocallyError) as exc_info:\n        Repo.get_url(\"foo\", \"bar\")\n\n    # verify no override\n    assert (tmp_dir / \"bar\").is_file()\n    assert (tmp_dir / \"bar\").read_text() == \"bar contents\"\n\n    # verify meaningful/BC exception type/errno\n    assert isinstance(exc_info.value, FileExistsError)\n    assert exc_info.value.errno == errno.EEXIST\n\n    # now, override\n    Repo.get_url(\"foo\", \"bar\", force=True)\n\n    assert (tmp_dir / \"bar\").is_file()\n    assert (tmp_dir / \"bar\").read_text() == \"foo contents\"\n\n\ndef test_get_dir(tmp_dir):\n    tmp_dir.gen({\"foo\": {\"foo\": \"foo contents\"}})\n\n    Repo.get_url(\"foo\", \"foo_imported\")\n\n    assert (tmp_dir / \"foo_imported\").is_dir()\n    assert (tmp_dir / \"foo_imported\" / \"foo\").is_file()\n    assert (tmp_dir / \"foo_imported\" / \"foo\").read_text() == \"foo contents\"\n\n\n@pytest.mark.parametrize(\"dname\", [\".\", \"dir\", \"dir/subdir\"])\ndef test_get_url_to_dir(tmp_dir, dname):\n    tmp_dir.gen({\"src\": {\"foo\": \"foo contents\"}, \"dir\": {\"subdir\": {}}})\n\n    Repo.get_url(os.path.join(\"src\", \"foo\"), dname)\n\n    assert (tmp_dir / dname).is_dir()\n    assert (tmp_dir / dname / \"foo\").read_text() == \"foo contents\"\n\n\ndef test_get_url_nonexistent(tmp_dir):\n    with pytest.raises(URLMissingError):\n        Repo.get_url(\"nonexistent\")\n\n\nclass TestGetUrl(_TestGetUrl):\n    pass\n\n\ndef test_get_url_config(tmp_dir, dvc, make_remote):\n    remote_path = make_remote(\"myremote\", default=False, typ=\"local\")\n    (remote_path / \"foo\").write_text(\"foo\")\n    (remote_path / \"bar\").write_text(\"bar\")\n\n    Repo.get_url(\"remote://myremote/foo\", config=Config.from_cwd())\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n"
  },
  {
    "path": "tests/func/test_ignore.py",
    "content": "import os\nimport shutil\nfrom pathlib import Path\n\nimport pytest\n\nfrom dvc.ignore import DvcIgnore, DvcIgnorePatterns\nfrom dvc.output import OutputIsIgnoredError\nfrom dvc.pathspec_math import PatternInfo, merge_patterns\nfrom dvc.repo import Repo\nfrom dvc.testing.tmp_dir import TmpDir\nfrom dvc_data.hashfile.build import IgnoreInCollectedDirError\nfrom dvc_data.hashfile.utils import get_mtime_and_size\n\n\ndef _to_pattern_info_list(str_list: list):\n    return [PatternInfo(a, \"\") for a in str_list]\n\n\n@pytest.mark.parametrize(\"filename\", [\"ignored\", \"тест\"])\ndef test_ignore(tmp_dir, dvc, filename):\n    tmp_dir.gen({\"dir\": {filename: filename, \"other\": \"text2\"}})\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, f\"dir/{filename}\")\n\n    dvc._reset()\n\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir)\n    assert set(result) == {\n        (tmp_dir / DvcIgnore.DVCIGNORE_FILE).fs_path,\n        (tmp_dir / \"dir\" / \"other\").fs_path,\n    }\n\n\ndef test_walk(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"foo\": \"foo\",\n            \"bar\": \"bar\",\n            \"dir\": {\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n                \"baz\": \"baz\",\n                \"subdir\": {\"foo\": \"foo\", \"qux\": \"qux\"},\n            },\n        }\n    )\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"dir/bar\\nfoo\")\n\n    dvc._reset()\n\n    result = list(dvc.dvcignore.walk(dvc.fs, tmp_dir))\n    assert result[0][0] == str(tmp_dir)\n    assert result[0][1] == [\"dir\"]\n    assert set(result[0][2]) == {\"bar\", \".dvcignore\"}\n    assert result[1][0] == str(tmp_dir / \"dir\")\n    assert result[1][1] == [\"subdir\"]\n    assert result[1][2] == [\"baz\"]\n    assert result[2][0] == str(tmp_dir / \"dir\" / \"subdir\")\n    assert result[2][1] == []\n    assert result[2][2] == [\"qux\"]\n\n    result = list(dvc.dvcignore.walk(dvc.fs, tmp_dir, detail=True))\n    assert result == [\n        (\n            str(tmp_dir),\n            {\"dir\": dvc.fs.info(str(tmp_dir / \"dir\"))},\n            {\n                \"bar\": dvc.fs.info(str(tmp_dir / \"bar\")),\n                \".dvcignore\": dvc.fs.info(str(tmp_dir / \".dvcignore\")),\n            },\n        ),\n        (\n            str(tmp_dir / \"dir\"),\n            {\n                \"subdir\": dvc.fs.info(str(tmp_dir / \"dir\" / \"subdir\")),\n            },\n            {\n                \"baz\": dvc.fs.info(str(tmp_dir / \"dir\" / \"baz\")),\n            },\n        ),\n        (\n            str(tmp_dir / \"dir\" / \"subdir\"),\n            {},\n            {\"qux\": dvc.fs.info(str(tmp_dir / \"dir\" / \"subdir\" / \"qux\"))},\n        ),\n    ]\n\n\ndef test_rename_ignored_file(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"ignored\": \"...\", \"other\": \"text\"}})\n\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"ignored*\")\n    dvc._reset()\n\n    mtime, size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    shutil.move(\"dir/ignored\", \"dir/ignored_new\")\n    new_mtime, new_size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    assert new_mtime == mtime\n    assert new_size == size\n\n\ndef test_rename_file(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    mtime, size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    shutil.move(\"dir/foo\", \"dir/foo_new\")\n    new_mtime, new_size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    assert new_mtime != mtime\n    assert new_size == size\n\n\ndef test_remove_ignored_file(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"ignored\": \"...\", \"other\": \"text\"}})\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"dir/ignored\")\n    dvc._reset()\n\n    mtime, size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    os.remove(\"dir/ignored\")\n    assert get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore) == (mtime, size)\n\n\ndef test_remove_file(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    mtime, size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    os.remove(\"dir/foo\")\n    new_mtime, new_size = get_mtime_and_size(\"dir\", dvc.fs, dvc.dvcignore)\n\n    assert new_mtime != mtime\n    assert new_size != size\n\n\ndef test_dvcignore_in_out_dir(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", DvcIgnore.DVCIGNORE_FILE: \"\"}})\n\n    with pytest.raises(IgnoreInCollectedDirError):\n        dvc.add(\"dir\")\n\n\n@pytest.mark.parametrize(\"dname\", [\"dir\", \"dir/subdir\"])\ndef test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):\n    tmp_dir.gen({\"dir\": {\"subdir\": {}}})\n\n    top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE)\n    top_ignore_file.write_text(os.path.basename(dname))\n    dvc._reset()\n\n    ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE\n    ignore_file.write_text(\"foo\")\n\n    dvcignore = dvc.dvcignore\n\n    top_ignore_path = os.path.dirname(os.fspath(top_ignore_file))\n\n    sub_dir_path = os.path.dirname(os.fspath(ignore_file))\n\n    assert (\n        DvcIgnorePatterns(\n            *merge_patterns(\n                os.path,\n                _to_pattern_info_list([\".hg/\", \".git/\", \".git\", \".dvc/\"]),\n                os.fspath(tmp_dir),\n                _to_pattern_info_list([os.path.basename(dname)]),\n                top_ignore_path,\n            ),\n            os.sep,\n        )\n        == dvcignore._get_trie_pattern(top_ignore_path)\n        == dvcignore._get_trie_pattern(sub_dir_path)\n    )\n\n\ndef test_ignore_on_branch(tmp_dir, scm, dvc):\n    from dvc.fs import GitFileSystem\n\n    tmp_dir.scm_gen({\"foo\": \"foo\", \"bar\": \"bar\"}, commit=\"add files\")\n\n    with tmp_dir.branch(\"branch\", new=True):\n        tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, \"foo\", commit=\"add ignore\")\n\n    dvc._reset()\n\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir)\n    assert set(result) == {\n        (tmp_dir / \"foo\").fs_path,\n        (tmp_dir / \"bar\").fs_path,\n        (tmp_dir / DvcIgnore.DVCIGNORE_FILE).fs_path,\n    }\n\n    dvc.fs = GitFileSystem(scm=scm, rev=\"branch\")\n    dvc.root_dir = \"/\"\n    assert dvc.dvcignore.is_ignored_file(\"/foo\")\n\n\ndef test_match_nested(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \".dvcignore\": \"*.backup\\ntmp\",\n            \"foo\": \"foo\",\n            \"tmp\": \"...\",\n            \"dir\": {\"x.backup\": \"x backup\", \"tmp\": \"content\"},\n        }\n    )\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir)\n    assert set(result) == {\n        (tmp_dir / DvcIgnore.DVCIGNORE_FILE).fs_path,\n        (tmp_dir / \"foo\").fs_path,\n    }\n\n\ndef test_ignore_external(tmp_dir, scm, dvc, tmp_path_factory):\n    tmp_dir.gen(\".dvcignore\", \"*.backup\\ntmp\")\n    ext_dir = TmpDir(os.fspath(tmp_path_factory.mktemp(\"external_dir\")))\n    ext_dir.gen({\"y.backup\": \"y\", \"tmp\": {\"file\": \"ext tmp\"}})\n\n    result = dvc.dvcignore.find(dvc.fs, ext_dir)\n    assert set(result) == {\n        (ext_dir / \"y.backup\").fs_path,\n        (ext_dir / \"tmp\" / \"file\").fs_path,\n    }\n    assert dvc.dvcignore.is_ignored_dir(os.fspath(ext_dir / \"tmp\")) is False\n    assert dvc.dvcignore.is_ignored_file(os.fspath(ext_dir / \"y.backup\")) is False\n\n\ndef test_ignore_resurface_subrepo(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n    subrepo_dir = tmp_dir / \"subdir\"\n    subrepo_dir.mkdir()\n    with subrepo_dir.chdir():\n        Repo.init(subdir=True)\n        subrepo_dir.gen({\"bar\": {\"bar\": \"bar\"}})\n\n    dvc._reset()\n\n    files = [\"foo\"]\n    dirs = [\"bar\"]\n    root = os.fspath(subrepo_dir)\n    assert dvc.dvcignore(root, dirs, files, ignore_subrepos=False) == (dirs, files)\n    assert dvc.dvcignore(root, dirs, files) == ([], [])\n\n    assert dvc.dvcignore.is_ignored_dir(os.fspath(subrepo_dir / \"bar\"))\n    assert not dvc.dvcignore.is_ignored_dir(\n        os.fspath(subrepo_dir / \"bar\"), ignore_subrepos=False\n    )\n\n\ndef test_ignore_blank_line(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"ignored\": \"text\", \"other\": \"text2\"}})\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"foo\\n\\ndir/ignored\")\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir / \"dir\")\n    assert set(result) == {(tmp_dir / \"dir\" / \"other\").fs_path}\n\n\n# It is not possible to re-include a file if a parent directory of\n# that file is excluded.\n# Git doesn't list excluded directories for performance reasons,\n# so any patterns on contained files have no effect,\n# no matter where they are defined.\n@pytest.mark.parametrize(\n    \"data_struct, pattern_list, result_set\",\n    [\n        (\n            {\"dir\": {\"subdir\": {\"not_ignore\": \"121\"}}},\n            [\"subdir/*\", \"!not_ignore\"],\n            {os.path.join(\"dir\", \"subdir\", \"not_ignore\")},\n        ),\n        (\n            {\"dir\": {\"subdir\": {\"should_ignore\": \"121\"}}},\n            [\"subdir\", \"!should_ignore\"],\n            set(),\n        ),\n        (\n            {\"dir\": {\"subdir\": {\"should_ignore\": \"121\"}}},\n            [\"subdir/\", \"!should_ignore\"],\n            set(),\n        ),\n    ],\n)\ndef test_ignore_file_in_parent_path(\n    tmp_dir, dvc, data_struct, pattern_list, result_set\n):\n    tmp_dir.gen(data_struct)\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"\\n\".join(pattern_list))\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir / \"dir\")\n    assert set(result) == {(tmp_dir / relpath).fs_path for relpath in result_set}\n\n\n# If there is a separator at the end of the pattern then the pattern\n# will only match directories,\n# otherwise the pattern can match both files and directories.\n# For example, a pattern doc/frotz/ matches doc/frotz directory,\n# but not a/doc/frotz directory;\ndef test_ignore_sub_directory(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"doc\": {\"fortz\": {\"b\": \"b\"}},\n                \"a\": {\"doc\": {\"fortz\": {\"a\": \"a\"}}},\n            }\n        }\n    )\n    tmp_dir.gen({\"dir\": {DvcIgnore.DVCIGNORE_FILE: \"doc/fortz\"}})\n\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir / \"dir\")\n    assert set(result) == {\n        (tmp_dir / \"dir\" / \"a\" / \"doc\" / \"fortz\" / \"a\").fs_path,\n        (tmp_dir / \"dir\" / DvcIgnore.DVCIGNORE_FILE).fs_path,\n    }\n\n\n# however frotz/ matches frotz and a/frotz that is a directory\ndef test_ignore_directory(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"fortz\": {}, \"a\": {\"fortz\": {}}}})\n    tmp_dir.gen({\"dir\": {DvcIgnore.DVCIGNORE_FILE: \"fortz\"}})\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir / \"dir\")\n    assert set(result) == {(tmp_dir / \"dir\" / DvcIgnore.DVCIGNORE_FILE).fs_path}\n\n\ndef test_multi_ignore_file(tmp_dir, dvc, monkeypatch):\n    tmp_dir.gen({\"dir\": {\"subdir\": {\"should_ignore\": \"1\", \"not_ignore\": \"1\"}}})\n    tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, \"dir/subdir/*_ignore\")\n    tmp_dir.gen({\"dir\": {DvcIgnore.DVCIGNORE_FILE: \"!subdir/not_ignore\"}})\n    dvc._reset()\n    result = dvc.dvcignore.find(dvc.fs, tmp_dir / \"dir\")\n    assert set(result) == {\n        (tmp_dir / \"dir\" / \"subdir\" / \"not_ignore\").fs_path,\n        (tmp_dir / \"dir\" / DvcIgnore.DVCIGNORE_FILE).fs_path,\n    }\n\n\ndef test_pattern_trie_fs(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"top\": {\n                \"first\": {\n                    DvcIgnore.DVCIGNORE_FILE: \"a\\nb\\nc\",\n                    \"middle\": {\n                        \"second\": {\n                            DvcIgnore.DVCIGNORE_FILE: \"d\\ne\\nf\",\n                            \"bottom\": {},\n                        }\n                    },\n                }\n            },\n            \"other\": {DvcIgnore.DVCIGNORE_FILE: \"1\\n2\\n3\"},\n        }\n    )\n    dvc._reset()\n    dvcignore = dvc.dvcignore\n\n    ignore_pattern_top = dvcignore._get_trie_pattern(os.fspath(tmp_dir / \"top\"))\n    ignore_pattern_other = dvcignore._get_trie_pattern(os.fspath(tmp_dir / \"other\"))\n    ignore_pattern_first = dvcignore._get_trie_pattern(\n        os.fspath(tmp_dir / \"top\" / \"first\")\n    )\n    ignore_pattern_middle = dvcignore._get_trie_pattern(\n        os.fspath(tmp_dir / \"top\" / \"first\" / \"middle\")\n    )\n    ignore_pattern_second = dvcignore._get_trie_pattern(\n        os.fspath(tmp_dir / \"top\" / \"first\" / \"middle\" / \"second\")\n    )\n    ignore_pattern_bottom = dvcignore._get_trie_pattern(\n        os.fspath(tmp_dir / \"top\" / \"first\" / \"middle\" / \"second\" / \"bottom\")\n    )\n\n    base_pattern = (\n        _to_pattern_info_list([\".hg/\", \".git/\", \".git\", \".dvc/\"]),\n        os.fspath(tmp_dir),\n    )\n    first_pattern = merge_patterns(\n        os.path,\n        *base_pattern,\n        _to_pattern_info_list([\"a\", \"b\", \"c\"]),\n        os.fspath(tmp_dir / \"top\" / \"first\"),\n    )\n    second_pattern = merge_patterns(\n        os.path,\n        *first_pattern,\n        _to_pattern_info_list([\"d\", \"e\", \"f\"]),\n        os.fspath(tmp_dir / \"top\" / \"first\" / \"middle\" / \"second\"),\n    )\n    other_pattern = merge_patterns(\n        os.path,\n        *base_pattern,\n        _to_pattern_info_list([\"1\", \"2\", \"3\"]),\n        os.fspath(tmp_dir / \"other\"),\n    )\n\n    assert DvcIgnorePatterns(*base_pattern, os.sep) == ignore_pattern_top\n    assert DvcIgnorePatterns(*other_pattern, os.sep) == ignore_pattern_other\n    assert (\n        DvcIgnorePatterns(*first_pattern, os.sep)\n        == ignore_pattern_first\n        == ignore_pattern_middle\n    )\n    assert (\n        DvcIgnorePatterns(*second_pattern, os.sep)\n        == ignore_pattern_second\n        == ignore_pattern_bottom\n    )\n\n\ndef test_ignore_in_added_dir(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"sub\": {\n                    \"ignored\": {\"content\": \"ignored content\"},\n                    \"not_ignored\": \"not ignored content\",\n                }\n            },\n            \".dvcignore\": \"**/ignored\",\n        }\n    )\n    dvc._reset()\n\n    ignored_path = tmp_dir / \"dir\" / \"sub\" / \"ignored\"\n    result = dvc.dvcignore.find(dvc.fs, ignored_path)\n    assert set(result) == set()\n    assert ignored_path.exists()\n\n    dvc.add(\"dir\")\n    shutil.rmtree(ignored_path)\n    dvc.checkout()\n\n    assert not ignored_path.exists()\n\n\ndef test_ignored_output(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.gen({\".dvcignore\": \"*.log\\n!foo.log\", \"foo\": \"foo content\"})\n\n    with pytest.raises(OutputIsIgnoredError):\n        run_copy(\"foo\", \"abc.log\", name=\"copy\")\n\n    run_copy(\"foo\", \"foo.log\", name=\"copy\")\n\n\ndef test_ignored_output_nested(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.gen({\".dvcignore\": \"/*.log\", \"copy\": {\"foo\": \"foo content\"}})\n\n    run_copy(\"foo\", \"foo.log\", name=\"copy\", wdir=\"copy\")\n\n    assert Path(\"copy/foo.log\").exists()\n\n\ndef test_run_dvcignored_dep(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\".dvcignore\": \"dir\\n\", \"dir\": {\"foo\": \"foo\"}})\n    run_copy(os.path.join(\"dir\", \"foo\"), \"bar\", name=\"copy-foo-to-bar\")\n    assert (tmp_dir / \"bar\").read_text() == \"foo\"\n\n\ndef test_pull_ignore(tmp_dir, dvc, local_cloud):\n    tmp_dir.dvc_gen(\n        {\n            \".dvcignore\": \"data/processed/\",\n            \"data\": {\"foo\": \"foo\", \"processed\": {\"bar\": \"bar\"}},\n        }\n    )\n    tmp_dir.add_remote(config=local_cloud.config)\n    dvc.add(\"data\")\n    dvc.push()\n\n    foo_path = tmp_dir / \"data\" / \"foo\"\n    foo_path.unlink()\n    assert not foo_path.exists()\n\n    dvc.cache.local.clear()\n    dvc.pull()\n\n    assert foo_path.exists()\n    assert foo_path.read_text() == \"foo\"\n"
  },
  {
    "path": "tests/func/test_import.py",
    "content": "import filecmp\nimport os\n\nimport pytest\nfrom dulwich.porcelain import push as git_push\nfrom dulwich.porcelain import remote_add as git_remote_add\nfrom funcy import first\n\nfrom dvc.cachemgr import CacheManager\nfrom dvc.config import NoRemoteError\nfrom dvc.dvcfile import load_file\nfrom dvc.fs import system\nfrom dvc.scm import Git\nfrom dvc.stage.exceptions import StagePathNotFoundError\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile import hash as _hash\nfrom dvc_data.index.index import DataIndex, DataIndexDirError\n\n\ndef test_import(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\")\n\n    assert os.path.isfile(\"foo_imported\")\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo content\"\n    assert scm.is_ignored(\"foo_imported\")\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n    assert stage.deps[0].fs.repo.cache.local.path == dvc.cache.local.path\n\n\n@pytest.mark.parametrize(\"src_is_dvc\", [True, False])\ndef test_import_git_file(tmp_dir, scm, dvc, git_dir, src_is_dvc):\n    if src_is_dvc:\n        git_dir.init(dvc=True)\n\n    git_dir.scm_gen(\"src\", \"hello\", commit=\"add a git file\")\n\n    stage = tmp_dir.dvc.imp(os.fspath(git_dir), \"src\", \"dst\")\n\n    assert (tmp_dir / \"dst\").read_text() == \"hello\"\n    assert tmp_dir.scm.is_ignored(os.fspath(tmp_dir / \"dst\"))\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(git_dir),\n        \"rev_lock\": git_dir.scm.get_rev(),\n    }\n\n\ndef test_import_cached_file(mocker, erepo_dir, tmp_dir, dvc, scm, monkeypatch):\n    src = \"some_file\"\n    dst = \"some_file_imported\"\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({src: \"hello\"}, commit=\"add a regular file\")\n\n    tmp_dir.dvc_gen({dst: \"hello\"})\n    (tmp_dir / dst).unlink()\n\n    remote_exception = NoRemoteError(\"dvc import\")\n    mocker.patch.object(dvc.cloud, \"get_remote_odb\", side_effect=remote_exception)\n    tmp_dir.dvc.imp(os.fspath(erepo_dir), src, dst)\n\n    assert (tmp_dir / dst).is_file()\n    assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False)\n\n\n@pytest.mark.parametrize(\"src_is_dvc\", [True, False])\ndef test_import_git_dir(tmp_dir, scm, dvc, git_dir, src_is_dvc):\n    if src_is_dvc:\n        git_dir.init(dvc=True)\n\n    git_dir.scm_gen({\"src\": {\"file.txt\": \"hello\"}}, commit=\"add a dir\")\n\n    stage = dvc.imp(os.fspath(git_dir), \"src\", \"dst\")\n\n    assert (tmp_dir / \"dst\").read_text() == {\"file.txt\": \"hello\"}\n    assert tmp_dir.scm.is_ignored(os.fspath(tmp_dir / \"dst\"))\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(git_dir),\n        \"rev_lock\": git_dir.scm.get_rev(),\n    }\n\n\ndef test_import_dir(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}}, commit=\"create dir\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"dir\", \"dir_imported\")\n\n    assert (tmp_dir / \"dir_imported\").read_text() == {\"foo\": \"foo content\"}\n    assert scm.is_ignored(\"dir_imported\")\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\ndef test_import_broken_dir(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}}, commit=\"create dir\")\n        erepo_dir.dvc.cache.local.clear()\n        remove(erepo_dir / \"dir\")\n\n    with pytest.raises(DataIndexDirError):\n        dvc.imp(os.fspath(erepo_dir), \"dir\", \"dir_imported\")\n\n    assert not (tmp_dir / \"dir_imported\").exists()\n    assert not (tmp_dir / \"dir_imported.dvc\").exists()\n\n\ndef test_import_file_from_dir(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\n            {\n                \"dir\": {\n                    \"1\": \"1\",\n                    \"2\": \"2\",\n                    \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"},\n                }\n            },\n            commit=\"create dir\",\n        )\n\n    stage = dvc.imp(os.fspath(erepo_dir), os.path.join(\"dir\", \"1\"))\n\n    assert (tmp_dir / \"1\").read_text() == \"1\"\n    assert scm.is_ignored(\"1\")\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n    dvc.imp(os.fspath(erepo_dir), os.path.join(\"dir\", \"2\"), out=\"file\")\n    assert (tmp_dir / \"file\").read_text() == \"2\"\n    assert (tmp_dir / \"file.dvc\").exists()\n\n    dvc.imp(os.fspath(erepo_dir), os.path.join(\"dir\", \"subdir\"))\n    assert (tmp_dir / \"subdir\" / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"subdir\" / \"bar\").read_text() == \"bar\"\n    assert (tmp_dir / \"subdir.dvc\").exists()\n\n    dvc.imp(os.fspath(erepo_dir), os.path.join(\"dir\", \"subdir\", \"foo\"), out=\"X\")\n    assert (tmp_dir / \"X\").read_text() == \"foo\"\n    assert (tmp_dir / \"X.dvc\").exists()\n\n\ndef test_import_file_from_dir_to_dir(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo\"}}, commit=\"create dir\")\n\n    with pytest.raises(StagePathNotFoundError):\n        dvc.imp(\n            os.fspath(erepo_dir),\n            os.path.join(\"dir\", \"foo\"),\n            out=os.path.join(\"dir\", \"foo\"),\n        )\n\n    tmp_dir.gen({\"dir\": {}})\n    dvc.imp(\n        os.fspath(erepo_dir),\n        os.path.join(\"dir\", \"foo\"),\n        out=os.path.join(\"dir\", \"foo\"),\n    )\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"dir\" / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"dir\" / \"foo.dvc\").exists()\n\n\ndef test_import_non_cached(erepo_dir, tmp_dir, dvc, scm):\n    src = \"non_cached_output\"\n    dst = src + \"_imported\"\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc.run(\n            cmd=f\"echo hello > {src}\",\n            outs_no_cache=[src],\n            name=\"gen\",\n        )\n\n    erepo_dir.scm_add([os.fspath(erepo_dir / src)], commit=\"add a non-cached out\")\n\n    stage = tmp_dir.dvc.imp(os.fspath(erepo_dir), src, dst)\n\n    assert (tmp_dir / dst).is_file()\n    assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False)\n    assert tmp_dir.scm.is_ignored(dst)\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\ndef test_import_rev(tmp_dir, scm, dvc, erepo_dir):\n    rev = None\n    with erepo_dir.chdir(), erepo_dir.branch(\"branch\", new=True):\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo on branch\")\n        rev = erepo_dir.scm.get_rev()\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", rev=\"branch\")\n\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo content\"\n    assert scm.is_ignored(\"foo_imported\")\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev\": \"branch\",\n        \"rev_lock\": rev,\n    }\n\n\ndef test_pull_imported_stage(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\")\n\n    dst_stage = load_file(dvc, \"foo_imported.dvc\").stage\n    dst_cache = dst_stage.outs[0].cache_path\n\n    remove(\"foo_imported\")\n    remove(dst_cache)\n    dvc.pull([\"foo_imported.dvc\"])\n\n    assert os.path.isfile(\"foo_imported\")\n    assert os.path.isfile(dst_cache)\n\n\ndef test_import_no_download(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", no_download=True)\n\n    assert not os.path.exists(\"foo_imported\")\n\n    dst_stage = load_file(dvc, \"foo_imported.dvc\").stage\n\n    assert dst_stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n    assert scm.is_ignored(\"foo_imported\")\n\n\ndef test_pull_import_no_download(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(os.path.join(\"foo\", \"bar\"), b\"bar\", commit=\"add bar\")\n        erepo_dir.dvc_gen(os.path.join(\"foo\", \"baz\"), b\"baz contents\", commit=\"add baz\")\n        size = (\n            len(b\"bar\")\n            + len(b\"baz contents\")\n            + len((erepo_dir / \"foo\" / \".gitignore\").read_bytes())\n        )\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", no_download=True)\n\n    dvc.pull([\"foo_imported.dvc\"])\n    assert (tmp_dir / \"foo_imported\").exists()\n    assert (tmp_dir / \"foo_imported\" / \"bar\").read_bytes() == b\"bar\"\n    assert (tmp_dir / \"foo_imported\" / \"baz\").read_bytes() == b\"baz contents\"\n\n    dvc.commit(force=True)\n\n    stage = load_file(dvc, \"foo_imported.dvc\").stage\n    if os.name == \"nt\":\n        expected_hash = \"2e798234df5f782340ac3ce046f8dfae.dir\"\n    else:\n        expected_hash = \"bdb8641831d8fcb03939637e09011c21.dir\"\n    assert stage.outs[0].hash_info.value == expected_hash\n\n    assert stage.outs[0].meta.size == size\n    assert stage.outs[0].meta.nfiles == 3\n    assert stage.outs[0].meta.isdir\n\n\ndef test_pull_import_no_download_rev_lock(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"add\")\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", no_download=True)\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"modified foo content\", commit=\"modify foo\")\n\n    dvc.pull([\"foo_imported.dvc\"])\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo content\"\n\n\ndef test_cache_type_is_properly_overridden(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        with erepo_dir.dvc.config.edit() as conf:\n            conf[\"cache\"][\"type\"] = \"symlink\"\n        erepo_dir.dvc.cache = CacheManager(erepo_dir.dvc)\n        erepo_dir.scm_add(\n            [erepo_dir.dvc.config.files[\"repo\"]],\n            \"set source repo cache type to symlink\",\n        )\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", \"create foo\")\n    assert system.is_symlink(erepo_dir / \"foo\")\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\")\n\n    assert not system.is_symlink(\"foo_imported\")\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo content\"\n    assert scm.is_ignored(\"foo_imported\")\n\n\n@pytest.mark.parametrize(\"dirpath\", [\"dir\", \"dir/\"])\ndef test_pull_imported_directory_stage(tmp_dir, dvc, erepo_dir, dirpath):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo content\"}}, commit=\"create dir\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), dirpath, \"dir_imported\")\n\n    remove(\"dir_imported\")\n    dvc.cache.local.clear()\n\n    dvc.pull([\"dir_imported.dvc\"])\n\n    assert (tmp_dir / \"dir_imported\").read_text() == {\"foo\": \"foo content\"}\n    assert stage.deps[0].fs_path == \"dir\"\n\n\ndef test_pull_wildcard_imported_directory_stage(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir123\": {\"foo\": \"foo content\"}}, commit=\"create dir\")\n\n    dvc.imp(os.fspath(erepo_dir), \"dir123\", \"dir_imported123\")\n\n    remove(\"dir_imported123\")\n    dvc.cache.local.clear()\n\n    dvc.pull([\"dir_imported*.dvc\"], glob=True)\n\n    assert (tmp_dir / \"dir_imported123\").read_text() == {\"foo\": \"foo content\"}\n\n\ndef test_push_wildcard_from_bare_git_repo(\n    tmp_dir, make_tmp_dir, erepo_dir, local_cloud\n):\n    Git.init(tmp_dir.fs_path, bare=True).close()\n\n    erepo_dir.add_remote(config=local_cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\n            {\n                \"dir123\": {\"foo\": \"foo content\"},\n                \"dirextra\": {\"extrafoo\": \"extra foo content\"},\n            },\n            commit=\"initial\",\n        )\n    erepo_dir.dvc.push([os.path.join(os.fspath(erepo_dir), \"dire*\")], glob=True)\n\n    git_remote_add(erepo_dir, \"origin\", os.fspath(tmp_dir))\n    git_push(erepo_dir, \"origin\")\n\n    dvc_repo = make_tmp_dir(\"dvc-repo\", scm=True, dvc=True)\n    with dvc_repo.chdir():\n        dvc_repo.dvc.imp(os.fspath(tmp_dir), \"dirextra\")\n\n        with pytest.raises(DataIndexDirError):\n            dvc_repo.dvc.imp(os.fspath(tmp_dir), \"dir123\")\n\n\n@pytest.mark.parametrize(\"dname\", [\".\", \"dir\", \"dir/subdir\"])\ndef test_import_to_dir(dname, tmp_dir, dvc, erepo_dir):\n    os.makedirs(dname, exist_ok=True)\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\", dname)\n\n    dst = os.path.join(dname, \"foo\")\n\n    assert stage.outs[0].fspath == os.path.abspath(dst)\n    assert os.path.isdir(dname)\n    assert (tmp_dir / dst).read_text() == \"foo content\"\n\n\ndef test_pull_non_workspace(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"master content\", commit=\"create foo\")\n\n        with erepo_dir.branch(\"branch\", new=True):\n            erepo_dir.dvc_gen(\"foo\", \"branch content\", commit=\"modify foo\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", rev=\"branch\")\n    tmp_dir.scm_add([stage.relpath], commit=\"imported branch\")\n    scm.tag(\"ref-to-branch\")\n\n    # Overwrite via import\n    (tmp_dir / \"foo_imported\").unlink()\n    dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\", rev=\"master\")\n\n    remove(stage.outs[0].cache_path)\n    dvc.fetch(all_tags=True)\n    assert os.path.exists(stage.outs[0].cache_path)\n\n\ndef test_import_non_existing(erepo_dir, tmp_dir, dvc):\n    with pytest.raises(FileNotFoundError):\n        tmp_dir.dvc.imp(os.fspath(erepo_dir), \"invalid_output\")\n\n    # https://github.com/treeverse/dvc/pull/2837#discussion_r352123053\n    with pytest.raises(FileNotFoundError):\n        tmp_dir.dvc.imp(os.fspath(erepo_dir), \"/root/\", \"root\")\n\n\ndef test_pull_no_rev_lock(erepo_dir, tmp_dir, dvc):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"contents\", commit=\"create foo\")\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\", \"foo_imported\")\n    assert \"rev\" not in stage.deps[0].def_repo\n    stage.deps[0].def_repo.pop(\"rev_lock\")\n\n    load_file(dvc, stage.path).dump(stage)\n\n    remove(stage.outs[0].cache_path)\n    (tmp_dir / \"foo_imported\").unlink()\n\n    dvc.pull([stage.path])\n\n    assert (tmp_dir / \"foo_imported\").is_file()\n    assert (tmp_dir / \"foo_imported\").read_text() == \"contents\"\n\n\ndef test_import_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir, local_cloud):\n    Git.init(tmp_dir.fs_path, bare=True).close()\n\n    erepo_dir.add_remote(config=local_cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"initial\")\n    erepo_dir.dvc.push()\n\n    git_remote_add(erepo_dir, \"origin\", os.fspath(tmp_dir))\n    git_push(erepo_dir, \"origin\", \"master\")\n\n    dvc_repo = make_tmp_dir(\"dvc-repo\", scm=True, dvc=True)\n    with dvc_repo.chdir():\n        dvc_repo.dvc.imp(os.fspath(tmp_dir), \"foo\")\n\n\ndef test_import_pipeline_tracked_outs(\n    tmp_dir, dvc, scm, erepo_dir, run_copy, local_remote\n):\n    from dvc.dvcfile import LOCK_FILE, PROJECT_FILE\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    dvc.push()\n\n    dvc.scm.add([PROJECT_FILE, LOCK_FILE])\n    dvc.scm.commit(\"add pipeline stage\")\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc.imp(f\"file://{tmp_dir.as_posix()}\", \"bar\", out=\"baz\")\n        assert (erepo_dir / \"baz\").read_text() == \"foo\"\n\n\ndef test_local_import(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"init\")\n    (tmp_dir / \"outdir\").mkdir()\n    dvc.imp(\".\", \"foo\", out=\"outdir\")\n\n\ndef test_import_mixed_dir(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(os.path.join(\"dir\", \"foo\"), \"foo\", commit=\"foo\")\n        erepo_dir.scm_gen(os.path.join(\"dir\", \"bar\"), \"bar\", commit=\"bar\")\n\n    dvc.imp(os.fspath(erepo_dir), \"dir\")\n    assert (tmp_dir / \"dir\").read_text() == {\n        \".gitignore\": \"/foo\\n\",\n        \"foo\": \"foo\",\n        \"bar\": \"bar\",\n    }\n\n\n@pytest.mark.parametrize(\"is_dvc\", [True, False])\n@pytest.mark.parametrize(\"files\", [{\"foo\": \"foo\"}, {\"dir\": {\"bar\": \"bar\"}}])\ndef test_import_subrepos(tmp_dir, erepo_dir, dvc, scm, is_dvc, files):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen\n    with subrepo.chdir():\n        gen(files, commit=\"add files in subrepo\")\n\n    key = next(iter(files))\n    path = str((subrepo / key).relative_to(erepo_dir))\n\n    stage = dvc.imp(os.fspath(erepo_dir), path, out=\"out\")\n\n    assert (tmp_dir / \"out\").read_text() == files[key]\n    assert stage.deps[0].def_path == path\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\ndef test_granular_import_from_subrepos(tmp_dir, dvc, erepo_dir):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"files in subrepo\")\n\n    path = os.path.join(\"subrepo\", \"dir\", \"bar\")\n    stage = dvc.imp(os.fspath(erepo_dir), path, out=\"out\")\n    assert (tmp_dir / \"out\").read_text() == \"bar\"\n    assert stage.deps[0].def_path == path\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\n@pytest.mark.parametrize(\"is_dvc\", [True, False])\n@pytest.mark.parametrize(\"files\", [{\"foo\": \"foo\"}, {\"dir\": {\"bar\": \"bar\"}}])\ndef test_pull_imported_stage_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc, files):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen\n    with subrepo.chdir():\n        gen(files, commit=\"files in subrepo\")\n\n    key = first(files)\n    path = os.path.join(\"subrepo\", key)\n    dvc.imp(os.fspath(erepo_dir), path, out=\"out\")\n\n    # clean everything\n    dvc.cache.local.clear()\n    remove(\"out\")\n\n    stats = dvc.pull([\"out.dvc\"])\n\n    expected = [f\"out{os.sep}\"] if isinstance(files[key], dict) else [\"out\"]\n    assert stats[\"added\"] == expected\n    assert (tmp_dir / \"out\").read_text() == files[key]\n\n\ndef test_import_complete_repo(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"dir\": {\"bar\": \"bar\"}}, commit=\"files in subrepo\")\n\n    dvc.imp(os.fspath(erepo_dir), \"subrepo\", out=\"out_sub\")\n    assert (tmp_dir / \"out_sub\").read_text() == {\n        \".gitignore\": \"/dir\\n\",\n        \"dir\": {\"bar\": \"bar\"},\n    }\n\n    dvc.imp(os.fspath(erepo_dir), os.curdir, out=\"out\")\n    assert (tmp_dir / \"out\").read_text() == {\".gitignore\": \"/foo\\n\", \"foo\": \"foo\"}\n\n\ndef test_import_with_no_exec(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    dvc.imp(os.fspath(erepo_dir), \"foo\", out=\"foo_imported\", no_exec=True)\n\n    dst = tmp_dir / \"foo_imported\"\n    assert not dst.exists()\n\n\ndef test_import_with_jobs(mocker, dvc, erepo_dir):\n    import dvc_data.hashfile.transfer as otransfer\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\n            {\n                \"dir1\": {\n                    \"file1\": \"file1\",\n                    \"file2\": \"file2\",\n                    \"file3\": \"file3\",\n                    \"file4\": \"file4\",\n                }\n            },\n            commit=\"init\",\n        )\n\n    spy = mocker.spy(otransfer, \"transfer\")\n    dvc.imp(os.fspath(erepo_dir), \"dir1\", jobs=3)\n    # the first call will be retrieving dir cache for \"dir1\" w/jobs None\n    for _args, kwargs in spy.call_args_list[1:]:\n        assert kwargs.get(\"jobs\") == 3\n\n\ndef test_chained_import(tmp_dir, dvc, make_tmp_dir, erepo_dir, local_cloud):\n    erepo_dir.add_remote(config=local_cloud.config)\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"init\")\n    erepo_dir.dvc.push()\n    remove(erepo_dir.dvc.cache.local.path)\n    remove(os.fspath(erepo_dir / \"dir\"))\n\n    erepo2 = make_tmp_dir(\"erepo2\", scm=True, dvc=True)\n    with erepo2.chdir():\n        erepo2.dvc.imp(os.fspath(erepo_dir), \"dir\")\n        erepo2.scm.add(\"dir.dvc\")\n        erepo2.scm.commit(\"import\")\n    remove(erepo2.dvc.cache.local.path)\n    remove(os.fspath(erepo2 / \"dir\"))\n\n    dvc.imp(os.fspath(erepo2), \"dir\", \"dir_imported\")\n    dst = tmp_dir / \"dir_imported\"\n    assert (dst / \"foo\").read_text() == \"foo\"\n    assert (dst / \"bar\").read_text() == \"bar\"\n\n    dvc.cache.local.clear()\n    remove(\"dir_imported\")\n\n    # pulled objects should come from the original upstream repo's remote,\n    # no cache or remote should be needed from the intermediate repo\n    dvc.pull([\"dir_imported.dvc\"])\n    assert not os.path.exists(erepo_dir.dvc.cache.local.path)\n    assert not os.path.exists(erepo2.dvc.cache.local.path)\n    assert (dst / \"foo\").read_text() == \"foo\"\n    assert (dst / \"bar\").read_text() == \"bar\"\n\n\n@pytest.mark.parametrize(\"paths\", [[], [\"dir\"]])\ndef test_parameterized_repo(tmp_dir, dvc, scm, erepo_dir, paths):\n    path = erepo_dir.joinpath(*paths)\n    path.mkdir(parents=True, exist_ok=True)\n    (path / \"params.yaml\").dump({\"out\": \"foo\"})\n    (path / \"dvc.yaml\").dump(\n        {\n            \"stages\": {\n                \"train\": {\"cmd\": \"echo ${out} > ${out}\", \"outs\": [\"${out}\"]},\n            }\n        }\n    )\n    path.gen({\"foo\": \"foo\"})\n    with path.chdir():\n        erepo_dir.dvc.commit(None, force=True)\n        erepo_dir.scm.add_commit(\n            [\"params.yaml\", \"dvc.yaml\", \"dvc.lock\", \".gitignore\"],\n            message=\"init\",\n        )\n\n    to_import = os.path.join(*paths, \"foo\")\n    stage = dvc.imp(os.fspath(erepo_dir), to_import, \"foo_imported\")\n\n    assert (tmp_dir / \"foo_imported\").read_text() == \"foo\"\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\n@pytest.mark.parametrize(\n    \"options, def_repo\",\n    [\n        ({\"config\": \"myconfig\"}, {\"config\": \"myconfig\"}),\n        ({\"remote\": \"myremote\"}, {\"remote\": \"myremote\"}),\n        ({\"remote_config\": {\"key\": \"value\"}}, {\"remote\": {\"key\": \"value\"}}),\n        (\n            {\n                \"remote\": \"myremote\",\n                \"remote_config\": {\"key\": \"value\"},\n            },\n            {\n                \"config\": {\n                    \"core\": {\"remote\": \"myremote\"},\n                    \"remote\": {\n                        \"myremote\": {\"key\": \"value\"},\n                    },\n                },\n            },\n        ),\n        (\n            {\n                \"remote\": \"myremote\",\n                \"remote_config\": {\"key\": \"value\"},\n                \"config\": {\"otherkey\": \"othervalue\"},\n            },\n            {\n                \"config\": {\n                    \"core\": {\"remote\": \"myremote\"},\n                    \"remote\": {\n                        \"myremote\": {\"key\": \"value\"},\n                    },\n                    \"otherkey\": \"othervalue\",\n                },\n            },\n        ),\n    ],\n)\ndef test_import_configs(tmp_dir, scm, dvc, erepo_dir, options, def_repo):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    (tmp_dir / \"myconfig\").touch()\n\n    stage = dvc.imp(\n        os.fspath(erepo_dir), \"foo\", \"foo_imported\", no_exec=True, **options\n    )\n    assert stage.deps[0].def_repo == {\"url\": os.fspath(erepo_dir), **def_repo}\n\n\ndef test_import_invalid_configs(tmp_dir, scm, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"foo\", \"foo content\", commit=\"create foo\")\n\n    with pytest.raises(\n        ValueError,\n        match=\"Can't specify config path together with both remote and remote_config\",\n    ):\n        dvc.imp(\n            os.fspath(erepo_dir),\n            \"foo\",\n            \"foo_imported\",\n            no_exec=True,\n            config=\"myconfig\",\n            remote=\"myremote\",\n            remote_config={\"key\": \"value\"},\n        )\n\n\n@pytest.mark.parametrize(\n    \"files,expected_info_calls\",\n    [\n        ({\"foo\": \"foo\"}, {(\"foo\",)}),\n        (\n            {\n                \"dir\": {\n                    \"bar\": \"bar\",\n                    \"subdir\": {\"lorem\": \"ipsum\", \"nested\": {\"lorem\": \"lorem\"}},\n                }\n            },\n            # info calls should be made for only directories\n            {(\"dir\",), (\"dir\", \"subdir\"), (\"dir\", \"subdir\", \"nested\")},\n        ),\n    ],\n)\ndef test_import_no_hash(\n    tmp_dir, scm, dvc, erepo_dir, mocker, files, expected_info_calls\n):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(files, commit=\"create foo\")\n\n    file_md5_spy = mocker.spy(_hash, \"file_md5\")\n    index_info_spy = mocker.spy(DataIndex, \"info\")\n    name = next(iter(files))\n\n    dvc.imp(os.fspath(erepo_dir), name, \"out\")\n\n    local_hashes = [\n        call.args[0]\n        for call in file_md5_spy.call_args_list\n        if call.args[1].protocol == \"local\"\n    ]\n    # no files should be hashed, should use existing metadata\n    assert not local_hashes\n    assert {\n        call.args[1] for call in index_info_spy.call_args_list\n    } == expected_info_calls\n"
  },
  {
    "path": "tests/func/test_import_db.py",
    "content": "import os\nimport sqlite3\nfrom contextlib import closing\n\nimport pandas as pd\nimport pytest\nfrom funcy import compact\n\nfrom dvc.testing import matchers as M\n\n\n@pytest.fixture\ndef db_path(tmp_dir):\n    return tmp_dir / \"main.db\"\n\n\n@pytest.fixture\ndef seed_db(db_path):\n    conn = sqlite3.connect(db_path)\n    conn.execute(\"CREATE TABLE model (id INTEGER PRIMARY KEY, value INTEGER)\")\n\n    def inner(values):\n        conn.executemany(\"INSERT INTO model(value) VALUES(?)\", [(i,) for i in values])\n        conn.commit()\n\n    with closing(conn):\n        yield inner\n\n\n@pytest.fixture\ndef db_connection(dvc, db_path):\n    with dvc.config.edit(level=\"local\") as conf:\n        conf[\"db\"] = {\"conn\": {\"url\": f\"sqlite:///{db_path.fs_path}\"}}\n    return \"conn\"\n\n\ndef load_data(file, output_format):\n    if output_format == \"json\":\n        return pd.read_json(file, orient=\"records\")\n    return pd.read_csv(file)\n\n\n@pytest.mark.parametrize(\"output_format\", [\"csv\", \"json\"])\n@pytest.mark.parametrize(\n    \"args,file_name\",\n    [\n        ({\"sql\": \"select * from model\"}, \"results\"),\n        ({\"table\": \"model\"}, \"model\"),\n    ],\n)\ndef test(tmp_dir, scm, dvc, db_connection, seed_db, output_format, args, file_name):\n    seed_db(values=range(5))\n    if output_format == \"json\":\n        file_size = 96, 192\n        md5 = \"6039fe7565d212b339aaa446ca234e5d\", \"e1b8adf4d9eb9ab2b64d3ab0bb5f65ac\"\n    elif os.name == \"nt\":\n        file_size = 35, 61\n        md5 = \"14c34db5ddd184345c06f74718539f04\", \"3bb836e6d43c9afa43a9d73b36bbbab4\"\n    else:\n        file_size = 29, 50\n        md5 = \"6f7fc0d701d1ac13eec83d79fffaf427\", \"c04f712f8167496a2fb43f289f2b7e28\"\n\n    db = compact(\n        {\n            \"file_format\": output_format,\n            \"connection\": db_connection,\n            \"table\": args.get(\"table\"),\n            \"query\": args.get(\"sql\"),\n        }\n    )\n    stage = dvc.imp_db(**args, connection=db_connection, output_format=output_format)\n\n    output_file = f\"{file_name}.{output_format}\"\n    df = load_data(output_file, output_format)\n    assert df.values.tolist() == [[i + 1, i] for i in range(5)]\n    assert (tmp_dir / stage.relpath).parse() == {\n        \"md5\": M.instance_of(str),\n        \"frozen\": True,\n        \"deps\": [{\"db\": db}],\n        \"outs\": [\n            {\n                \"md5\": md5[0],\n                \"size\": file_size[0],\n                \"hash\": \"md5\",\n                \"path\": output_file,\n            }\n        ],\n    }\n\n    seed_db(values=range(5, 10))\n\n    dvc.update(stage.addressing)\n\n    df = load_data(output_file, output_format)\n    assert df.values.tolist() == [[i + 1, i] for i in range(10)]\n    assert (tmp_dir / stage.relpath).parse() == {\n        \"md5\": M.instance_of(str),\n        \"frozen\": True,\n        \"deps\": [{\"db\": db}],\n        \"outs\": [\n            {\n                \"md5\": md5[1],\n                \"size\": file_size[1],\n                \"hash\": \"md5\",\n                \"path\": output_file,\n            }\n        ],\n    }\n"
  },
  {
    "path": "tests/func/test_import_url.py",
    "content": "import os\nimport textwrap\nfrom uuid import uuid4\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.dependency.base import Dependency, DependencyDoesNotExistError\nfrom dvc.dvcfile import load_file\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.stage import Stage\nfrom dvc.testing.workspace_tests import TestImport as _TestImport\nfrom tests.utils import get_gitignore_content\n\n\ndef test_cmd_import(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    ret = main([\"import-url\", \"foo\", \"import\"])\n    assert ret == 0\n    assert os.path.exists(\"import.dvc\")\n\n    ret = main([\"import-url\", \"non-existing-file\", \"import\"])\n    assert ret != 0\n\n\ndef test_cmd_unsupported_scheme(dvc):\n    ret = main([\"import-url\", \"unsupported://path\", \"import_unsupported\"])\n    assert ret != 0\n\n\ndef test_default_output(tmp_dir, dvc, cloud):\n    filename = str(uuid4())\n    tmpfile = cloud / filename\n    tmpfile.write_bytes(b\"content\")\n    cloud.gen(filename, \"content\")\n\n    ret = main([\"import-url\", tmpfile.fs_path])\n    assert ret == 0\n    assert (tmp_dir / filename).read_bytes() == b\"content\"\n\n\ndef test_should_remove_outs_before_import(tmp_dir, dvc, mocker, erepo_dir):\n    erepo_dir.gen({\"foo\": \"foo\"})\n\n    remove_outs_call_counter = mocker.spy(Stage, \"remove_outs\")\n    ret = main([\"import-url\", os.fspath(erepo_dir / \"foo\")])\n\n    assert ret == 0\n    assert remove_outs_call_counter.mock.call_count == 1\n\n\ndef test_import_conflict_and_override(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    tmp_dir.gen(\"bar\", \"bar\")\n\n    # bar exists, fail\n    ret = main([\"import-url\", \"foo\", \"bar\"])\n    assert ret != 0\n    assert not os.path.exists(\"bar.dvc\")\n\n    # force override\n    ret = main([\"import-url\", \"foo\", \"bar\", \"--force\"])\n    assert ret == 0\n    assert os.path.exists(\"bar.dvc\")\n\n\n@pytest.mark.parametrize(\"dname\", [\".\", \"dir\", \"dir/subdir\"])\ndef test_import_url_to_dir(dname, tmp_dir, dvc):\n    tmp_dir.gen({\"data_dir\": {\"file\": \"file content\"}})\n    src = os.path.join(\"data_dir\", \"file\")\n\n    os.makedirs(dname, exist_ok=True)\n\n    stage = dvc.imp_url(src, dname)\n\n    dst = tmp_dir / dname / \"file\"\n\n    assert stage.outs[0].fs_path == os.fspath(dst)\n    assert os.path.isdir(dname)\n    assert dst.read_text() == \"file content\"\n\n\ndef test_import_stage_accompanies_target(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file1\", \"file1 content\", commit=\"commit file\")\n\n    tmp_dir.gen({\"dir\": {}})\n    erepo = {\"url\": os.fspath(erepo_dir)}\n    dvc.imp_url(\"file1\", out=os.path.join(\"dir\", \"imported_file\"), erepo=erepo)\n\n    assert (tmp_dir / \"dir\" / \"imported_file\").exists()\n    assert (tmp_dir / \"dir\" / \"imported_file.dvc\").exists()\n\n\ndef test_import_url_nonexistent(dvc, erepo_dir):\n    with pytest.raises(DependencyDoesNotExistError):\n        dvc.imp_url(os.fspath(erepo_dir / \"non-existent\"))\n\n\ndef test_import_url_with_no_exec(tmp_dir, dvc, erepo_dir):\n    tmp_dir.gen({\"data_dir\": {\"file\": \"file content\"}})\n    src = os.path.join(\"data_dir\", \"file\")\n\n    dvc.imp_url(src, \".\", no_exec=True)\n    dst = tmp_dir / \"file\"\n    assert not dst.exists()\n\n\nclass TestImport(_TestImport):\n    @pytest.fixture\n    def stage_md5(self):\n        return \"7033ee831f78a4dfec2fc71405516067\"\n\n    @pytest.fixture\n    def dir_md5(self):\n        return \"b6dcab6ccd17ca0a8bf4a215a37d14cc.dir\"\n\n    @pytest.fixture\n    def is_object_storage(self):\n        return False\n\n\ndef test_import_url_preserve_fields(tmp_dir, dvc):\n    text = textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        deps:\n        - path: foo # dep comment\n        outs:\n        - path: bar # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          meta:\n            key: value\n        meta: some metadata\n    \"\"\"\n    )\n    tmp_dir.gen(\"bar.dvc\", text)\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.imp_url(\"foo\", out=\"bar\")\n    assert (tmp_dir / \"bar.dvc\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        # top comment\n        desc: top desc\n        deps:\n        - path: foo # dep comment\n          md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n        outs:\n        - path: bar # out comment\n          desc: out desc\n          type: mytype\n          labels:\n          - label1\n          - label2\n          meta:\n            key: value\n          md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n        meta: some metadata\n        md5: 8fc199641730e3f512deac0bd9a0e0b6\n        frozen: true\n    \"\"\"\n    )\n\n\ndef test_import_url_to_remote_absolute(tmp_dir, make_tmp_dir, dvc, scm, local_remote):\n    tmp_abs_dir = make_tmp_dir(\"abs\")\n    tmp_foo = tmp_abs_dir / \"foo\"\n    tmp_foo.write_text(\"foo\")\n\n    stage = dvc.imp_url(str(tmp_foo), to_remote=True)\n\n    foo = tmp_dir / \"foo\"\n    assert stage.deps[0].fspath == str(tmp_foo)\n    assert stage.outs[0].fspath == os.fspath(foo)\n    assert foo.with_suffix(\".dvc\").exists()\n    assert get_gitignore_content() == [\"/foo\"]\n\n\ndef test_import_url_to_remote_invalid_combinations(dvc):\n    with pytest.raises(InvalidArgumentError, match=\"--no-exec\"):\n        dvc.imp_url(\"s3://bucket/foo\", no_exec=True, to_remote=True)\n\n\ndef test_import_url_to_remote_status(tmp_dir, dvc, local_cloud, local_remote):\n    local_cloud.gen(\"foo\", \"foo\")\n\n    stage = dvc.imp_url(str(local_cloud / \"foo\"), to_remote=True)\n    assert stage.md5 is not None\n\n    status = dvc.status()\n    assert status[\"foo.dvc\"] == [{\"changed outs\": {\"foo\": \"not in cache\"}}]\n\n    dvc.pull()\n\n    status = dvc.status()\n    assert len(status) == 0\n\n\ndef test_import_url_no_download(tmp_dir, scm, dvc, local_workspace):\n    local_workspace.gen(\"file\", \"file content\")\n    dst = tmp_dir / \"file\"\n    stage = dvc.imp_url(\"remote://workspace/file\", os.fspath(dst), no_download=True)\n\n    assert stage.deps[0].hash_info.value == \"d10b4c3ff123b26dc068d43a8bef2d23\"\n\n    assert not dst.exists()\n    assert scm.is_ignored(dst)\n\n    out = stage.outs[0]\n    assert not out.hash_info\n    assert out.meta.size is None\n\n    status = dvc.status()\n    assert status[\"file.dvc\"] == [{\"changed outs\": {\"file\": \"deleted\"}}]\n\n\ndef test_partial_import_pull(tmp_dir, scm, dvc, local_workspace):\n    local_workspace.gen(\"file\", \"file content\")\n    dst = tmp_dir / \"file\"\n    dvc.imp_url(\"remote://workspace/file\", os.fspath(dst), no_download=True)\n\n    dvc.pull([\"file.dvc\"])\n\n    assert dst.exists()\n\n    dvc.commit(force=True)\n\n    stage = load_file(dvc, \"file.dvc\").stage\n    assert stage.outs[0].hash_info.value == \"d10b4c3ff123b26dc068d43a8bef2d23\"\n    assert stage.outs[0].meta.size == 12\n\n\ndef test_import_url_fs_config(tmp_dir, dvc, workspace, mocker):\n    import dvc.fs as dvc_fs\n\n    workspace.gen(\"foo\", \"foo\")\n\n    url = \"remote://workspace/foo\"\n    get_fs_config = mocker.spy(dvc_fs, \"get_fs_config\")\n    dep_init = mocker.spy(Dependency, \"__init__\")\n    dvc.imp_url(url, fs_config={\"jobs\": 42})\n\n    stage = load_file(dvc, \"foo.dvc\").stage\n    assert stage.deps[0].def_fs_config == {\"jobs\": 42}\n\n    dep_init_kwargs = dep_init.call_args[1]\n    assert dep_init_kwargs.get(\"fs_config\") == {\"jobs\": 42}\n\n    assert get_fs_config.call_args_list[0][1] == {\"url\": \"foo\"}\n    assert get_fs_config.call_args_list[1][1] == {\"url\": url, \"jobs\": 42}\n    assert get_fs_config.call_args_list[2][1] == {\"name\": \"workspace\"}\n\n    dep_init.reset_mock()\n\n    dvc.pull(\"foo.dvc\")\n\n    dep_init_kwargs = dep_init.call_args[1]\n    assert dep_init_kwargs.get(\"fs_config\") == {\"jobs\": 42}\n"
  },
  {
    "path": "tests/func/test_init.py",
    "content": "import logging\nimport os\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.config import Config\nfrom dvc.exceptions import InitError\nfrom dvc.repo import Repo as DvcRepo\n\n\ndef test_api_init(scm):\n    DvcRepo.init().close()\n    assert os.path.isdir(DvcRepo.DVC_DIR)\n\n\ndef test_cli_init(scm):\n    ret = main([\"init\"])\n    assert ret == 0\n    assert os.path.isdir(DvcRepo.DVC_DIR)\n\n\ndef test_double_init(scm):\n    ret = main([\"init\"])\n    assert ret == 0\n    assert os.path.isdir(DvcRepo.DVC_DIR)\n\n    ret = main([\"init\"])\n    assert ret != 0\n    assert os.path.isdir(DvcRepo.DVC_DIR)\n\n    ret = main([\"init\", \"--force\"])\n    assert ret == 0\n    assert os.path.isdir(DvcRepo.DVC_DIR)\n\n\ndef test_init_no_scm_fail_api(tmp_dir):\n    with pytest.raises(InitError):\n        DvcRepo.init()\n\n\ndef test_init_no_scm_fail_cli(tmp_dir):\n    ret = main([\"init\"])\n    assert ret != 0\n\n\ndef test_init_no_scm_api(tmp_dir):\n    repo = DvcRepo.init(no_scm=True)\n\n    assert (tmp_dir / DvcRepo.DVC_DIR).is_dir()\n    assert repo.config[\"core\"][\"no_scm\"]\n\n\ndef test_init_no_scm_cli(tmp_dir):\n    ret = main([\"init\", \"--no-scm\"])\n    assert ret == 0\n\n    dvc_path = tmp_dir / DvcRepo.DVC_DIR\n    assert dvc_path.is_dir()\n    assert Config(os.fspath(dvc_path))[\"core\"][\"no_scm\"]\n\n\ndef test_init_quiet_should_not_display_welcome_screen(tmp_dir, scm, caplog):\n    with caplog.at_level(logging.INFO, logger=\"dvc\"):\n        ret = main([\"init\", \"--quiet\"])\n\n        assert ret == 0\n        assert not caplog.text\n\n\ndef test_allow_init_dvc_subdir(tmp_dir, scm, monkeypatch):\n    tmp_dir.gen({\"subdir\": {}})\n\n    with monkeypatch.context() as m:\n        m.chdir(\"subdir\")\n        assert main([\"init\", \"--subdir\"]) == 0\n\n    repo = DvcRepo(\"subdir\")\n    assert repo.root_dir == os.fspath(tmp_dir / \"subdir\")\n    assert repo.scm.root_dir == os.fspath(tmp_dir)\n\n\ndef test_subdir_init_no_option(tmp_dir, scm, monkeypatch, caplog):\n    tmp_dir.gen({\"subdir\": {}})\n\n    caplog.clear()\n    with monkeypatch.context() as m:\n        m.chdir(\"subdir\")\n        with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n            assert main([\"init\"]) == 1\n\n    assert (\n        \"{} is not tracked by any supported SCM tool (e.g. Git). \"\n        \"Use `--no-scm` if you don't want to use any SCM or \"\n        \"`--subdir` if initializing inside a subdirectory of a parent SCM \"\n        \"repository.\".format(os.fspath(tmp_dir / \"subdir\"))\n        in caplog.text\n    )\n\n\ndef test_gen_dvcignore(tmp_dir):\n    DvcRepo.init(no_scm=True)\n    text = (\n        \"# Add patterns of files dvc should ignore, which could improve\\n\"\n        \"# the performance. Learn more at\\n\"\n        \"# https://dvc.org/doc/user-guide/dvcignore\\n\"\n    )\n    assert text == (tmp_dir / \".dvcignore\").read_text()\n\n\ndef test_init_when_ignored_by_git(tmp_dir, scm, caplog):\n    # https://github.com/treeverse/dvc/issues/3738\n    tmp_dir.gen({\".gitignore\": \".*\"})\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert main([\"init\"]) == 1\n    assert (\n        f\"{tmp_dir / DvcRepo.DVC_DIR} is ignored by your SCM tool. \\n\"\n        \"Make sure that it's tracked, \"\n        \"for example, by adding '!.dvc' to .gitignore.\" in caplog.text\n    )\n"
  },
  {
    "path": "tests/func/test_install.py",
    "content": "import os\nimport pathlib\nimport sys\n\nimport pytest\nfrom dulwich.porcelain import remote_add as git_remote_add\n\nfrom dvc.exceptions import DvcException\nfrom dvc_data.hashfile.hash import file_md5\nfrom tests.func.parsing.test_errors import escape_ansi\n\ngit = pytest.importorskip(\"git\")\n\n\n@pytest.mark.skipif(\n    sys.platform == \"win32\", reason=\"Git hooks aren't supported on Windows\"\n)\nclass TestInstall:\n    def _hook(self, name):\n        return pathlib.Path(\".git\") / \"hooks\" / name\n\n    def test_create_hooks(self, scm, dvc):\n        dvc.install()\n\n        hooks_with_commands = [\n            (\"post-checkout\", \"exec dvc git-hook post-checkout\"),\n            (\"pre-commit\", \"exec dvc git-hook pre-commit\"),\n            (\"pre-push\", \"exec dvc git-hook pre-push\"),\n        ]\n\n        for fname, command in hooks_with_commands:\n            hook_path = self._hook(fname)\n            assert hook_path.is_file()\n            assert command in hook_path.read_text()\n\n    def test_install_pre_commit_tool(self, scm, dvc):\n        dvc.install(use_pre_commit_tool=True)\n\n        precommit_path = pathlib.Path(\".\") / \".pre-commit-config.yaml\"\n        assert precommit_path.is_file()\n\n    def test_fail_if_hook_exists(self, scm, dvc):\n        self._hook(\"post-checkout\").write_text(\"hook content\")\n\n        with pytest.raises(DvcException) as exc_info:  # noqa: PT011\n            dvc.install()\n\n        assert (\n            escape_ansi(str(exc_info.value)) == \"Hook 'post-checkout' already exists. \"\n            \"Please refer to <https://man.dvc.org/install> for more info.\"\n        )\n\n    def test_pre_commit_hook(self, tmp_dir, scm, dvc, caplog):\n        tmp_dir.dvc_gen(\"file\", \"file content\", commit=\"create foo\")\n        tmp_dir.gen(\"file\", \"file modified\")\n        dvc.install()\n\n        # scm.commit bypasses hooks\n        with pytest.raises(git.GitCommandError, match=r\"modified:\\s*file\"):\n            scm.gitpython.repo.git.commit(m=\"file modified\")\n\n    def test_post_checkout(self, tmp_dir, scm, dvc):\n        tmp_dir.dvc_gen({\"file\": \"file content\"}, commit=\"add\")\n        os.unlink(\"file\")\n        dvc.install()\n\n        scm.gitpython.git.checkout(\"-b\", \"new_branch\")\n\n        assert os.path.isfile(\"file\")\n\n    def test_pre_push_hook(self, tmp_dir, scm, dvc, tmp_path_factory):\n        temp = tmp_path_factory.mktemp(\"external\")\n        git_remote = temp / \"project.git\"\n        storage_path = temp / \"dvc_storage\"\n\n        with dvc.config.edit() as conf:\n            conf[\"remote\"][\"store\"] = {\"url\": os.fspath(storage_path)}\n            conf[\"core\"][\"remote\"] = \"store\"\n        tmp_dir.dvc_gen(\"file\", \"file_content\", \"commit message\")\n\n        file_checksum = file_md5(\"file\", dvc.fs)\n        expected_storage_path = (\n            storage_path / \"files\" / \"md5\" / file_checksum[:2] / file_checksum[2:]\n        )\n\n        scm.clone(os.fspath(tmp_dir), os.fspath(git_remote))\n        git_remote_add(tmp_dir, \"origin\", os.fspath(git_remote))\n\n        dvc.install()\n\n        assert not expected_storage_path.is_file()\n        scm.gitpython.repo.git.push(\"origin\", \"master\")\n        assert expected_storage_path.is_file()\n        assert expected_storage_path.read_text() == \"file_content\"\n\n\n@pytest.mark.skipif(\n    sys.platform == \"win32\", reason=\"Git hooks aren't supported on Windows\"\n)\ndef test_merge_driver_no_ancestor(tmp_dir, scm, dvc):\n    with tmp_dir.branch(\"one\", new=True):\n        tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\"}}, commit=\"one: add data\")\n\n    scm.checkout(\"two\", create_new=True)\n    dvc.checkout()  # keep things in sync\n\n    tmp_dir.dvc_gen({\"data\": {\"bar\": \"bar\"}}, commit=\"two: add data\")\n\n    # installing hook only before merge, as it runs `dvc` commands which makes\n    # `checkouts` and `commits` above slower\n    dvc.install()\n    (tmp_dir / \".gitattributes\").write_text(\"*.dvc merge=dvc\")\n\n    scm.gitpython.repo.git.merge(\"one\", m=\"merged\", no_gpg_sign=True, no_signoff=True)\n\n    # NOTE: dvc shouldn't checkout automatically as it might take a long time\n    assert (tmp_dir / \"data\").read_text() == {\"bar\": \"bar\"}\n    assert (tmp_dir / \"data.dvc\").read_text() == (\n        \"outs:\\n\"\n        \"- md5: 5ea40360f5b4ec688df672a4db9c17d1.dir\\n\"\n        \"  size: 6\\n\"\n        \"  nfiles: 2\\n\"\n        \"  hash: md5\\n\"\n        \"  path: data\\n\"\n    )\n\n    dvc.checkout(\"data.dvc\")\n    assert (tmp_dir / \"data\").read_text() == {\"foo\": \"foo\", \"bar\": \"bar\"}\n\n\n@pytest.mark.skipif(\n    sys.platform == \"win32\", reason=\"Git hooks aren't supported on Windows\"\n)\ndef test_merge_driver(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"data\": {\"master\": \"master\"}}, commit=\"master: add data\")\n\n    with tmp_dir.branch(\"one\", new=True):\n        tmp_dir.dvc_gen({\"data\": {\"one\": \"one\"}}, commit=\"one: add data\")\n\n    scm.checkout(\"two\", create_new=True)\n    dvc.checkout()  # keep things in sync\n\n    tmp_dir.dvc_gen({\"data\": {\"two\": \"two\"}}, commit=\"two: add data\")\n\n    # installing hook only before merge, as it runs `dvc` commands on\n    # `checkouts` and `commits` which slows tests down\n    dvc.install()\n    (tmp_dir / \".gitattributes\").write_text(\"*.dvc merge=dvc\")\n\n    scm.gitpython.repo.git.merge(\"one\", m=\"merged\", no_gpg_sign=True, no_signoff=True)\n\n    # NOTE: dvc shouldn't checkout automatically as it might take a long time\n    assert (tmp_dir / \"data\").read_text() == {\"master\": \"master\", \"two\": \"two\"}\n    assert (tmp_dir / \"data.dvc\").read_text() == (\n        \"outs:\\n\"\n        \"- md5: 839ef9371606817569c1ee0e5f4ed233.dir\\n\"\n        \"  size: 12\\n\"\n        \"  nfiles: 3\\n\"\n        \"  hash: md5\\n\"\n        \"  path: data\\n\"\n    )\n\n    dvc.checkout(\"data.dvc\")\n    assert (tmp_dir / \"data\").read_text() == {\n        \"master\": \"master\",\n        \"one\": \"one\",\n        \"two\": \"two\",\n    }\n"
  },
  {
    "path": "tests/func/test_lock.py",
    "content": "import multiprocessing\nimport time\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.exceptions import DvcException\nfrom dvc.lock import Lock, LockError, make_lock\n\n\ndef test_with(tmp_dir, dvc, mocker):\n    # patching to speedup tests\n    mocker.patch(\"dvc.lock.DEFAULT_TIMEOUT\", 0.01)\n\n    lockfile = tmp_dir / dvc.tmp_dir / \"lock\"\n    with Lock(lockfile):\n        with pytest.raises(LockError), Lock(lockfile):\n            pass\n\n\ndef test_unlock_lock_failed(tmp_dir, dvc, request, mocker):\n    # patching to speedup tests\n    mocker.patch(\"dvc.lock.DEFAULT_TIMEOUT\", 0.01)\n\n    lockfile = tmp_dir / dvc.tmp_dir / \"lock\"\n    lock = Lock(lockfile)\n    lock_ext = Lock(lockfile)\n\n    # It's a common scenario now to have lock unlocked and locked back (e.g. in\n    # repro of a stage) in with. We should see LockError exception here.\n    with lock:\n        lock.unlock()\n        lock_ext.lock()  # imitate an external process had time to lock it\n        request.addfinalizer(lock_ext.unlock)\n        with pytest.raises(LockError):\n            lock.lock()\n\n\ndef test_unlock_unlocked_raises():\n    lock = Lock(\"lock\")\n    with pytest.raises(DvcException, match=\"Unlock called on an unlocked lock\"):\n        lock.unlock()\n\n\ndef test_cli(tmp_dir, dvc, mocker, caplog):\n    # patching to speedup tests\n    mocker.patch(\"dvc.lock.DEFAULT_TIMEOUT\", 0.01)\n\n    expected_error_msg = (\n        \"Unable to acquire lock. Most likely another DVC process is \"\n        \"running or was terminated abruptly. Check the page \"\n        \"<https://dvc.org/doc/user-guide/troubleshooting#lock-issue> \"\n        \"for other possible reasons and to learn how to resolve this.\"\n    )\n    with Lock(tmp_dir / dvc.tmp_dir / \"lock\"):\n        assert main([\"add\", \"foo\"]) == 1\n    assert expected_error_msg in caplog.text\n\n\ndef hold_lock_until_signaled(lockfile_path, result_queue, release_signal):\n    lock = make_lock(lockfile_path)\n    with lock:\n        result_queue.put(\"p1_acquired\")\n        release_signal.wait()\n    result_queue.put(\"p1_released\")\n\n\ndef try_lock_with_wait(lockfile_path, wait, result_queue):\n    result_queue.put(\"p2_starting\")\n    try:\n        lock = make_lock(lockfile_path, wait=wait)\n        with lock:\n            result_queue.put(\"p2_acquired\")\n    except LockError as e:\n        result_queue.put(f\"error: {e}\")\n    else:\n        result_queue.put(\"p2_released\")\n\n\ndef test_lock_waits_when_requested(request, tmp_path):\n    lockfile = tmp_path / \"lock\"\n\n    q: multiprocessing.Queue[str] = multiprocessing.Queue()\n    release_signal = multiprocessing.Event()\n    # Process 1 holds the lock until signaled to release it\n    p1 = multiprocessing.Process(\n        target=hold_lock_until_signaled, args=(lockfile, q, release_signal)\n    )\n    p2 = multiprocessing.Process(target=try_lock_with_wait, args=(lockfile, True, q))\n\n    p1.start()\n    request.addfinalizer(p1.kill)\n\n    assert q.get(timeout=4) == \"p1_acquired\"\n\n    # Process 2 will wait for the lock (should succeed)\n    p2.start()\n    request.addfinalizer(p2.kill)\n\n    assert q.get(timeout=4) == \"p2_starting\"\n    assert q.empty()\n\n    # sleep to ensure Process 2 is waiting for the lock\n    time.sleep(1)\n    release_signal.set()  # release the lock\n\n    p1.join(timeout=4)\n\n    events = [q.get(timeout=2), q.get(timeout=2), q.get(timeout=2)]\n    # we still can't be sure of the order of events.\n    assert \"p1_released\" in events\n    assert \"p2_acquired\" in events\n    assert \"p2_released\" in events\n\n    p2.join(timeout=1)\n\n    assert q.empty()\n"
  },
  {
    "path": "tests/func/test_lockfile.py",
    "content": "from collections import OrderedDict\nfrom operator import itemgetter\n\nfrom dvc.dvcfile import LOCK_FILE\nfrom dvc.stage.utils import split_params_deps\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import dumps_yaml, parse_yaml_for_update\nfrom tests.func.test_run import supported_params\n\nFS_STRUCTURE = {\n    \"foo\": \"bar\\nfoobar\",\n    \"bar\": \"foo\\nfoobar\",\n    \"foobar\": \"foobar\\nbar\",\n    \"params.yaml\": dumps_yaml(supported_params),\n    \"params2.yaml\": dumps_yaml(supported_params),\n}\n\n\ndef read_lock_file(file=LOCK_FILE):\n    with open(file, encoding=\"utf-8\") as f:\n        data = parse_yaml_for_update(f.read(), file)\n    assert isinstance(data, OrderedDict)\n    return data\n\n\ndef assert_eq_lockfile(previous, new):\n    for content in (previous, new):\n        assert isinstance(content, OrderedDict)\n\n    # if they both are OrderedDict, then `==` will also check for order\n    assert previous == new\n\n\ndef test_deps_outs_are_sorted_by_path(tmp_dir, dvc, run_head):\n    tmp_dir.gen(FS_STRUCTURE)\n    deps = [\"foo\", \"bar\", \"foobar\"]\n    run_head(*deps, name=\"copy-first-line\")\n\n    initial_content = read_lock_file()\n    lock = initial_content[\"stages\"][\"copy-first-line\"]\n\n    # lock stage key order:\n    assert list(lock.keys()) == [\"cmd\", \"deps\", \"outs\"]\n\n    # `path` key appear first and then the `md5`\n    assert all(\n        list(dep.keys()) == [\"path\", \"hash\", \"md5\", \"size\"] for dep in lock[\"deps\"]\n    )\n    assert all(\n        list(out.keys()) == [\"path\", \"hash\", \"md5\", \"size\"] for out in lock[\"outs\"]\n    )\n\n    # deps are always sorted by the file path naming\n    assert list(map(itemgetter(\"path\"), lock[\"deps\"])) == sorted(deps)\n\n    # outs are too\n    assert list(map(itemgetter(\"path\"), lock[\"outs\"])) == [\n        d + \"-1\" for d in sorted(deps)\n    ]\n\n\ndef test_order_is_preserved_when_pipeline_order_changes(tmp_dir, dvc, run_head):\n    tmp_dir.gen(FS_STRUCTURE)\n    deps = [\"foo\", \"bar\", \"foobar\"]\n    stage = run_head(*deps, name=\"copy-first-line\")\n\n    initial_content = read_lock_file()\n    # reverse order of stage.outs and dump to the pipeline file\n    # then, again change stage.deps and dump to the pipeline file\n    reversal = stage.outs.reverse, stage.deps.reverse\n    for reverse_items in reversal:\n        reverse_items()\n        stage.dvcfile._dump_pipeline_file(stage)\n\n        # we only changed the order, should not reproduce\n        assert not dvc.reproduce(stage.addressing)\n\n        new_lock_content = read_lock_file()\n        assert_eq_lockfile(new_lock_content, initial_content)\n\n        (tmp_dir / LOCK_FILE).unlink()\n        assert dvc.reproduce(stage.addressing) == [stage]\n        new_lock_content = read_lock_file()\n        assert_eq_lockfile(new_lock_content, initial_content)\n\n\ndef test_cmd_changes_other_orders_are_preserved(tmp_dir, dvc, run_head):\n    tmp_dir.gen(FS_STRUCTURE)\n    deps = [\"foo\", \"bar\", \"foobar\"]\n    stage = run_head(*deps, name=\"copy-first-line\")\n\n    initial_content = read_lock_file()\n    # let's change cmd in pipeline file\n    # it should only change \"cmd\", otherwise it should be\n    # structurally same as cmd\n    new_cmd = \"python head.py foo bar foobar\"\n    assert stage.cmd != new_cmd  # sanity check\n    stage.cmd = new_cmd\n    stage.dvcfile._dump_pipeline_file(stage)\n\n    initial_content[\"stages\"][\"copy-first-line\"][\"cmd\"] = stage.cmd\n\n    assert dvc.reproduce(stage.addressing) == [stage]\n\n    new_lock_content = read_lock_file()\n    assert_eq_lockfile(new_lock_content, initial_content)\n\n\ndef test_params_dump(tmp_dir, dvc, run_head):\n    tmp_dir.gen(FS_STRUCTURE)\n\n    stage = run_head(\n        \"foo\",\n        \"bar\",\n        \"foobar\",\n        name=\"copy-first-line\",\n        params=[\n            \"params2.yaml:answer,lists,name\",\n            \"params.yaml:lists,floats,nested.nested1,nested.nested1.nested2\",\n        ],\n    )\n\n    initial_content = read_lock_file()\n    lock = initial_content[\"stages\"][\"copy-first-line\"]\n\n    # lock stage key order:\n    assert list(lock.keys()) == [\"cmd\", \"deps\", \"params\", \"outs\"]\n    assert list(lock[\"params\"].keys()) == [\"params.yaml\", \"params2.yaml\"]\n\n    # # params keys are always sorted by the name\n    assert list(lock[\"params\"][\"params.yaml\"].keys()) == [\n        \"floats\",\n        \"lists\",\n        \"nested.nested1\",\n        \"nested.nested1.nested2\",\n    ]\n    assert list(lock[\"params\"][\"params2.yaml\"]) == [\"answer\", \"lists\", \"name\"]\n\n    assert not dvc.reproduce(stage.addressing)\n\n    # let's change the order of params and dump them in pipeline file\n    params, _ = split_params_deps(stage)\n    for param in params:\n        param.params.reverse()\n\n    stage.dvcfile._dump_pipeline_file(stage)\n    assert not dvc.reproduce(stage.addressing)\n\n    (tmp_dir / LOCK_FILE).unlink()\n    assert dvc.reproduce(stage.addressing) == [stage]\n    assert_eq_lockfile(initial_content, read_lock_file())\n\n    # remove build-cache and check if the same structure is built\n    for item in [dvc.stage_cache.cache_dir, LOCK_FILE]:\n        remove(item)\n    assert dvc.reproduce(stage.addressing) == [stage]\n    assert_eq_lockfile(initial_content, read_lock_file())\n"
  },
  {
    "path": "tests/func/test_ls.py",
    "content": "import os\nimport shutil\nimport textwrap\nfrom operator import itemgetter\nfrom os.path import join\n\nimport pytest\n\nfrom dvc.fs import MemoryFileSystem\nfrom dvc.repo import Repo\nfrom dvc.repo.ls import _ls_tree, ls_tree\nfrom dvc.scm import CloneError\nfrom dvc.testing import matchers as M\n\nFS_STRUCTURE = {\n    \"README.md\": \"content\",\n    \"model/script.py\": \"content\",\n    \"model/train.py\": \"content\",\n    \".gitignore\": \"content\",\n}\n\nDVC_STRUCTURE = {\n    \"structure.xml\": \"content\",\n    \"data/subcontent/data.xml\": \"content\",\n    \"data/subcontent/statistics/data.csv\": \"content\",\n    \"model/people.csv\": \"content\",\n}\n\n\ndef match_files(files, expected_files):\n    left = {(f[\"path\"], f[\"isout\"]) for f in files}\n    right = {(os.path.join(*args), isout) for (args, isout) in expected_files}\n    assert left == right\n\n\ndef create_dvc_pipeline(tmp_dir, dvc):\n    script = textwrap.dedent(\n        \"\"\"\\\n        import os, sys\n        f = sys.argv[1]\n        os.makedirs(os.path.dirname(f))\n        open(f, \"w+\").close()\n    \"\"\"\n    )\n    tmp_dir.scm_gen({\"script.py\": script}, commit=\"init\")\n    tmp_dir.dvc_gen({\"dep\": \"content\"}, commit=\"init dvc\")\n    dvc.run(\n        cmd=\"python script.py {}\".format(os.path.join(\"out\", \"file\")),\n        outs=[os.path.join(\"out\", \"file\")],\n        deps=[\"dep\"],\n        name=\"touch\",\n    )\n    tmp_dir.scm_add([\"dvc.yaml\", \"dvc.lock\"], commit=\"run\")\n    shutil.rmtree(\"out\")\n\n\ndef test_ls_repo(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = Repo.ls(os.fspath(tmp_dir))\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\",), False),\n            ((\"data\",), False),\n            ((\"structure.xml\",), True),\n        ),\n    )\n\n\ndef test_ls_repo_recursive(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = Repo.ls(os.fspath(tmp_dir), recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\", \"script.py\"), False),\n            ((\"model\", \"train.py\"), False),\n            ((\"model\", \"people.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"data.xml.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv\"), True),\n            ((\"data\", \"subcontent\", \"statistics\", \".gitignore\"), False),\n            ((\"data\", \"subcontent\", \"data.xml\"), True),\n            ((\"data\", \"subcontent\", \".gitignore\"), False),\n            ((\"model\", \"people.csv\"), True),\n            ((\"model\", \".gitignore\"), False),\n            ((\"structure.xml\",), True),\n        ),\n    )\n\n\ndef test_ls_repo_dvc_only_recursive(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = Repo.ls(os.fspath(tmp_dir), recursive=True, dvc_only=True)\n    match_files(\n        files,\n        (\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv\"), True),\n            ((\"data\", \"subcontent\", \"data.xml\"), True),\n            ((\"model\", \"people.csv\"), True),\n            ((\"structure.xml\",), True),\n        ),\n    )\n\n\ndef test_ls_repo_with_new_path_dir(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen({\"mysub\": {}}, commit=\"dvc\")\n    tmp_dir.gen({\"mysub/sub\": {\"foo\": \"content\"}})\n\n    files = Repo.ls(os.fspath(tmp_dir), path=\"mysub/sub\")\n    match_files(files, (((\"foo\",), False),))\n\n\ndef test_ls_repo_with_path_dir(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = Repo.ls(os.fspath(tmp_dir), path=\"model\")\n    match_files(\n        files,\n        (\n            ((\"script.py\",), False),\n            ((\"train.py\",), False),\n            ((\"people.csv\",), True),\n            ((\"people.csv.dvc\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_repo_with_path_dir_dvc_only_empty(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n    tmp_dir.scm_gen({\"folder/.keep\": \"content\"}, commit=\"add .keep\")\n    tmp_dir.scm_gen({\"empty_scm_folder/\": {}}, commit=\"add scm empty\")\n    tmp_dir.dvc_gen({\"empty_dvc_folder\": {}}, commit=\"empty dvc folder\")\n\n    with pytest.raises(FileNotFoundError):\n        Repo.ls(os.fspath(tmp_dir), path=\"not_exist_folder\")\n\n    assert Repo.ls(os.fspath(tmp_dir), path=\"empty_scm_folder\") == []\n\n    assert Repo.ls(os.fspath(tmp_dir), path=\"folder\", dvc_only=True) == []\n\n    assert Repo.ls(os.fspath(tmp_dir), path=\"empty_dvc_folder\", dvc_only=True) == []\n\n\ndef test_ls_repo_with_path_subdir(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    path = os.path.join(\"data\", \"subcontent\")\n    files = Repo.ls(os.fspath(tmp_dir), path)\n    match_files(\n        files,\n        (\n            ((\"data.xml\",), True),\n            ((\"data.xml.dvc\",), False),\n            ((\"statistics\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_repo_with_path_subdir_dvc_only(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    path = os.path.join(\"data\", \"subcontent\")\n    files = Repo.ls(os.fspath(tmp_dir), path, dvc_only=True)\n    match_files(files, (((\"data.xml\",), True), ((\"statistics\",), False)))\n\n\ndef test_ls_repo_with_path_subdir_dvc_only_recursive(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    path = os.path.join(\"data\", \"subcontent\")\n    files = Repo.ls(os.fspath(tmp_dir), path, dvc_only=True, recursive=True)\n    match_files(files, (((\"data.xml\",), True), ((\"statistics\", \"data.csv\"), True)))\n\n\ndef test_ls_repo_with_path_file_out(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    path = os.path.join(\"data\", \"subcontent\", \"data.xml\")\n    files = Repo.ls(os.fspath(tmp_dir), path)\n    match_files(files, (((\"data.xml\",), True),))\n\n\ndef test_ls_repo_with_file_path_fs(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    path = \"README.md\"\n    files = Repo.ls(os.fspath(tmp_dir), path, recursive=True)\n    match_files(files, (((\"README.md\",), False),))\n\n\ndef test_ls_repo_with_missed_path(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    with pytest.raises(FileNotFoundError):\n        Repo.ls(os.fspath(tmp_dir), path=\"missed_path\")\n\n\ndef test_ls_repo_with_missed_path_dvc_only(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    with pytest.raises(FileNotFoundError):\n        Repo.ls(\n            os.fspath(tmp_dir),\n            path=\"missed_path\",\n            recursive=True,\n            dvc_only=True,\n        )\n\n\ndef test_ls_repo_with_removed_dvc_dir(tmp_dir, dvc, scm):\n    create_dvc_pipeline(tmp_dir, dvc)\n\n    files = Repo.ls(os.fspath(tmp_dir))\n    match_files(\n        files,\n        (\n            ((\"script.py\",), False),\n            ((\"dep.dvc\",), False),\n            ((\"dvc.yaml\",), False),\n            ((\"dvc.lock\",), False),\n            ((\"dep\",), True),\n            ((\"out\",), False),\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_repo_with_removed_dvc_dir_recursive(tmp_dir, dvc, scm):\n    create_dvc_pipeline(tmp_dir, dvc)\n\n    files = Repo.ls(os.fspath(tmp_dir), recursive=True)\n    match_files(\n        files,\n        (\n            ((\"script.py\",), False),\n            ((\"dep.dvc\",), False),\n            ((\"dvc.yaml\",), False),\n            ((\"dvc.lock\",), False),\n            ((\"dep\",), True),\n            ((\"out\", \"file\"), True),\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_repo_with_removed_dvc_dir_with_path_dir(tmp_dir, dvc, scm):\n    create_dvc_pipeline(tmp_dir, dvc)\n\n    path = \"out\"\n    files = Repo.ls(os.fspath(tmp_dir), path)\n    match_files(files, (((\"file\",), True),))\n\n\ndef test_ls_repo_with_removed_dvc_dir_with_path_file(tmp_dir, dvc, scm):\n    create_dvc_pipeline(tmp_dir, dvc)\n\n    path = os.path.join(\"out\", \"file\")\n    files = Repo.ls(os.fspath(tmp_dir), path)\n    match_files(files, (((\"file\",), True),))\n\n\ndef test_ls_repo_with_rev(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    rev = erepo_dir.scm.list_all_commits()[1]\n    files = Repo.ls(os.fspath(erepo_dir), rev=rev)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"model\",), False),\n        ),\n    )\n\n\ndef test_ls_remote_repo(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    url = f\"file://{erepo_dir.as_posix()}\"\n    files = Repo.ls(url)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\",), False),\n            ((\"data\",), False),\n            ((\"structure.xml\",), True),\n        ),\n    )\n\n\ndef test_ls_remote_repo_recursive(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    url = f\"file://{erepo_dir.as_posix()}\"\n    files = Repo.ls(url, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\", \"script.py\"), False),\n            ((\"model\", \"train.py\"), False),\n            ((\"model\", \"people.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"data.xml.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv\"), True),\n            ((\"data\", \"subcontent\", \"statistics\", \".gitignore\"), False),\n            ((\"data\", \"subcontent\", \"data.xml\"), True),\n            ((\"data\", \"subcontent\", \".gitignore\"), False),\n            ((\"model\", \"people.csv\"), True),\n            ((\"model\", \".gitignore\"), False),\n            ((\"structure.xml\",), True),\n        ),\n    )\n\n\ndef test_ls_remote_git_only_repo_recursive(git_dir):\n    with git_dir.chdir():\n        git_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n\n    url = f\"file://{git_dir.as_posix()}\"\n    files = Repo.ls(url, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"model\", \"script.py\"), False),\n            ((\"model\", \"train.py\"), False),\n        ),\n    )\n\n\ndef test_ls_remote_repo_with_path_dir(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    url = f\"file://{erepo_dir.as_posix()}\"\n    path = \"model\"\n    files = Repo.ls(url, path)\n    match_files(\n        files,\n        (\n            ((\"script.py\",), False),\n            ((\"train.py\",), False),\n            ((\"people.csv\",), True),\n            ((\"people.csv.dvc\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_remote_repo_with_rev(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    rev = erepo_dir.scm.list_all_commits()[1]\n    url = f\"file://{erepo_dir.as_posix()}\"\n    files = Repo.ls(url, rev=rev)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"model\",), False),\n        ),\n    )\n\n\ndef test_ls_remote_repo_with_rev_recursive(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n        erepo_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n\n    rev = erepo_dir.scm.list_all_commits()[1]\n    url = f\"file://{erepo_dir.as_posix()}\"\n    files = Repo.ls(url, rev=rev, recursive=True)\n    match_files(\n        files,\n        (\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\", \"people.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"data.xml.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv.dvc\"), False),\n            ((\"data\", \"subcontent\", \"statistics\", \"data.csv\"), True),\n            ((\"data\", \"subcontent\", \"statistics\", \".gitignore\"), False),\n            ((\"data\", \"subcontent\", \"data.xml\"), True),\n            ((\"data\", \"subcontent\", \".gitignore\"), False),\n            ((\"model\", \"people.csv\"), True),\n            ((\"model\", \".gitignore\"), False),\n            ((\"structure.xml\",), True),\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n        ),\n    )\n\n\ndef test_ls_not_existed_url():\n    from time import time\n\n    dirname = \"__{}_{}\".format(\"not_existed\", time())\n    with pytest.raises(CloneError):\n        Repo.ls(dirname, recursive=True)\n\n\ndef test_ls_shows_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy):\n    from dvc.dvcfile import LOCK_FILE, PROJECT_FILE\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    dvc.scm.add([PROJECT_FILE, LOCK_FILE])\n    dvc.scm.commit(\"add pipeline stage\")\n\n    files = Repo.ls(os.curdir, dvc_only=True)\n    match_files(files, (((\"bar\",), True),))\n\n\ndef test_ls_granular(erepo_dir):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\n            {\n                \"dir\": {\n                    \"1\": \"1\",\n                    \"2\": \"2\",\n                    \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"},\n                }\n            },\n            commit=\"create dir\",\n        )\n\n    entries = Repo.ls(os.fspath(erepo_dir), os.path.join(\"dir\", \"subdir\"))\n    assert entries == [\n        {\n            \"isout\": True,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"bar\",\n            \"size\": 3,\n            \"md5\": \"37b51d194a7513e45b56f6524f2d51f2\",\n        },\n        {\n            \"isout\": True,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"foo\",\n            \"size\": 3,\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        },\n    ]\n\n    entries = Repo.ls(os.fspath(erepo_dir), \"dir\")\n    assert entries == [\n        {\n            \"isout\": True,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"1\",\n            \"size\": 1,\n            \"md5\": \"c4ca4238a0b923820dcc509a6f75849b\",\n        },\n        {\n            \"isout\": True,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"2\",\n            \"size\": 1,\n            \"md5\": \"c81e728d9d4c2f636f067f89cc14862c\",\n        },\n        {\n            \"isout\": True,\n            \"isdir\": True,\n            \"isexec\": False,\n            \"path\": \"subdir\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n    ]\n\n\n@pytest.mark.parametrize(\"use_scm\", [True, False])\ndef test_ls_target(erepo_dir, use_scm):\n    with erepo_dir.chdir():\n        gen = erepo_dir.scm_gen if use_scm else erepo_dir.dvc_gen\n        gen(\n            {\n                \"dir\": {\n                    \"1\": \"1\",\n                    \"2\": \"2\",\n                    \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"},\n                }\n            },\n            commit=\"create dir\",\n        )\n\n    isout = not use_scm\n\n    def _ls(path):\n        return Repo.ls(os.fspath(erepo_dir), path)\n\n    assert _ls(os.path.join(\"dir\", \"1\")) == [\n        {\n            \"isout\": isout,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"1\",\n            \"size\": 1,\n            \"md5\": \"c4ca4238a0b923820dcc509a6f75849b\" if not use_scm else None,\n        }\n    ]\n    assert _ls(os.path.join(\"dir\", \"subdir\", \"foo\")) == [\n        {\n            \"isout\": isout,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"foo\",\n            \"size\": 3,\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\" if not use_scm else None,\n        }\n    ]\n    assert _ls(os.path.join(\"dir\", \"subdir\")) == [\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": isout,\n            \"path\": \"bar\",\n            \"size\": 3,\n            \"md5\": \"37b51d194a7513e45b56f6524f2d51f2\" if not use_scm else None,\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": isout,\n            \"path\": \"foo\",\n            \"size\": 3,\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\" if not use_scm else None,\n        },\n    ]\n\n\n@pytest.mark.parametrize(\n    \"dvc_top_level, erepo_type\",\n    [\n        (True, \"erepo_dir\"),\n        (False, \"git_dir\"),\n    ],\n)\ndef test_subrepo(request, dvc_top_level, erepo_type):\n    from tests.func.test_get import make_subrepo\n\n    dvc_files = {\"foo.txt\": \"foo.txt\", \"dvc_dir\": {\"lorem\": \"lorem\"}}\n    scm_files = {\"bar.txt\": \"bar.txt\", \"scm_dir\": {\"ipsum\": \"ipsum\"}}\n\n    erepo = request.getfixturevalue(erepo_type)\n    subrepo = erepo / \"subrepo\"\n    make_subrepo(subrepo, erepo.scm)\n\n    for repo in [erepo, subrepo]:\n        with repo.chdir():\n            repo.scm_gen(scm_files, commit=f\"scm track for top {repo}\")\n            if hasattr(repo, \"dvc\"):\n                repo.dvc_gen(dvc_files, commit=f\"dvc track for {repo}\")\n\n    def _list_files(repo, path=None):\n        return set(map(itemgetter(\"path\"), Repo.ls(os.fspath(repo), path)))\n\n    extras = {\".dvcignore\", \".gitignore\"}\n    git_tracked_outputs = {\"bar.txt\", \"scm_dir\"}\n    dvc_files = {\"dvc_dir\", \"foo.txt\", \"foo.txt.dvc\", \"dvc_dir.dvc\"}\n    common_outputs = git_tracked_outputs | extras | dvc_files\n\n    top_level_outputs = common_outputs if dvc_top_level else git_tracked_outputs\n    assert _list_files(erepo) == top_level_outputs\n    assert _list_files(erepo, \"scm_dir\") == {\"ipsum\"}\n    if dvc_top_level:\n        assert _list_files(erepo, \"dvc_dir\") == {\"lorem\"}\n\n    assert _list_files(subrepo, \".\") == common_outputs\n    assert _list_files(subrepo, \"scm_dir\") == {\"ipsum\"}\n    assert _list_files(subrepo, \"dvc_dir\") == {\"lorem\"}\n\n\ndef test_broken_symlink(tmp_dir, dvc):\n    from dvc.fs import system\n\n    tmp_dir.gen(\"file\", \"content\")\n    system.symlink(\"file\", \"link\")\n\n    os.remove(\"file\")\n\n    entries = Repo.ls(os.fspath(tmp_dir))\n\n    assert entries == [\n        {\n            \"isout\": False,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \".dvcignore\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n        {\n            \"isout\": False,\n            \"isdir\": False,\n            \"isexec\": False,\n            \"path\": \"link\",\n            \"size\": 0,\n            \"md5\": None,\n        },\n    ]\n\n\ndef test_ls_broken_dir(tmp_dir, dvc):\n    from dvc_data.index import DataIndexDirError\n\n    tmp_dir.dvc_gen(\n        {\n            \"broken\": {\"baz\": \"baz\"},\n        }\n    )\n\n    shutil.rmtree(tmp_dir / \"broken\")\n    dvc.cache.local.clear()\n\n    tmp_dir.dvc_gen(\n        {\n            \"foo\": \"foo\",\n            \"dir\": {\"bar\": \"bar\"},\n        }\n    )\n\n    entries = Repo.ls(os.fspath(tmp_dir))\n    assert entries == [\n        {\n            \"isdir\": False,\n            \"isexec\": False,\n            \"isout\": False,\n            \"path\": \".dvcignore\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n        {\n            \"isdir\": True,\n            \"isexec\": False,\n            \"isout\": True,\n            \"path\": \"broken\",\n            \"size\": 3,\n            \"md5\": \"630bd47b538d2a513c7d267d07e0bc44.dir\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": False,\n            \"isout\": False,\n            \"path\": \"broken.dvc\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n        {\n            \"isdir\": True,\n            \"isexec\": False,\n            \"isout\": True,\n            \"path\": \"dir\",\n            \"size\": M.instance_of(int),\n            \"md5\": \"91aaa9bb58b657d623ef143b195a67e4.dir\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": False,\n            \"isout\": False,\n            \"path\": \"dir.dvc\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": False,\n            \"isout\": True,\n            \"path\": \"foo\",\n            \"size\": 3,\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": False,\n            \"isout\": False,\n            \"path\": \"foo.dvc\",\n            \"size\": M.instance_of(int),\n            \"md5\": None,\n        },\n    ]\n\n    with pytest.raises(DataIndexDirError):\n        Repo.ls(os.fspath(tmp_dir), \"broken\")\n\n    with pytest.raises(DataIndexDirError):\n        Repo.ls(os.fspath(tmp_dir), recursive=True)\n\n\ndef test_ls_maxdepth(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = Repo.ls(os.fspath(tmp_dir), \"structure.xml\", maxdepth=0, recursive=True)\n    match_files(files, (((\"structure.xml\",), True),))\n\n    files = Repo.ls(os.fspath(tmp_dir), maxdepth=0, recursive=True)\n    match_files(files, (((os.curdir,), False),))\n\n    files = Repo.ls(os.fspath(tmp_dir), maxdepth=1, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((\"structure.xml.dvc\",), False),\n            ((\"model\",), False),\n            ((\"data\",), False),\n            ((\"structure.xml\",), True),\n        ),\n    )\n    files = Repo.ls(os.fspath(tmp_dir), maxdepth=2, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((join(\"data\", \"subcontent\"),), False),\n            ((join(\"model\", \".gitignore\"),), False),\n            ((join(\"model\", \"people.csv\"),), True),\n            ((join(\"model\", \"people.csv.dvc\"),), False),\n            ((join(\"model\", \"script.py\"),), False),\n            ((join(\"model\", \"train.py\"),), False),\n            ((\"structure.xml\",), True),\n            ((\"structure.xml.dvc\",), False),\n        ),\n    )\n\n    files = Repo.ls(os.fspath(tmp_dir), maxdepth=3, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((join(\"data\", \"subcontent\", \".gitignore\"),), False),\n            ((join(\"data\", \"subcontent\", \"data.xml\"),), True),\n            ((join(\"data\", \"subcontent\", \"data.xml.dvc\"),), False),\n            ((join(\"data\", \"subcontent\", \"statistics\"),), False),\n            ((join(\"model\", \".gitignore\"),), False),\n            ((join(\"model\", \"people.csv\"),), True),\n            ((join(\"model\", \"people.csv.dvc\"),), False),\n            ((join(\"model\", \"script.py\"),), False),\n            ((join(\"model\", \"train.py\"),), False),\n            ((join(\"structure.xml\"),), True),\n            ((join(\"structure.xml.dvc\"),), False),\n        ),\n    )\n\n    files = Repo.ls(os.fspath(tmp_dir), maxdepth=4, recursive=True)\n    match_files(\n        files,\n        (\n            ((\".dvcignore\",), False),\n            ((\".gitignore\",), False),\n            ((\"README.md\",), False),\n            ((join(\"data\", \"subcontent\", \".gitignore\"),), False),\n            ((join(\"data\", \"subcontent\", \"data.xml\"),), True),\n            ((join(\"data\", \"subcontent\", \"data.xml.dvc\"),), False),\n            ((join(\"data\", \"subcontent\", \"statistics\", \".gitignore\"),), False),\n            ((join(\"data\", \"subcontent\", \"statistics\", \"data.csv\"),), True),\n            ((join(\"data\", \"subcontent\", \"statistics\", \"data.csv.dvc\"),), False),\n            ((join(\"model\", \".gitignore\"),), False),\n            ((join(\"model\", \"people.csv\"),), True),\n            ((join(\"model\", \"people.csv.dvc\"),), False),\n            ((join(\"model\", \"script.py\"),), False),\n            ((join(\"model\", \"train.py\"),), False),\n            ((\"structure.xml\",), True),\n            ((\"structure.xml.dvc\",), False),\n        ),\n    )\n\n\ndef _simplify_tree(files):\n    ret = {}\n    for path, info in files.items():\n        if content := info.get(\"contents\"):\n            ret[path] = _simplify_tree(content)\n        else:\n            ret[path] = None\n    return ret\n\n\ndef test_ls_tree(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = ls_tree(os.fspath(tmp_dir), \"structure.xml\")\n    assert _simplify_tree(files) == {\"structure.xml\": None}\n\n    files = ls_tree(os.fspath(tmp_dir))\n\n    expected = {\n        \".\": {\n            \".dvcignore\": None,\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"data\": {\n                \"subcontent\": {\n                    \".gitignore\": None,\n                    \"data.xml\": None,\n                    \"data.xml.dvc\": None,\n                    \"statistics\": {\n                        \".gitignore\": None,\n                        \"data.csv\": None,\n                        \"data.csv.dvc\": None,\n                    },\n                }\n            },\n            \"model\": {\n                \".gitignore\": None,\n                \"people.csv\": None,\n                \"people.csv.dvc\": None,\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n            \"structure.xml\": None,\n            \"structure.xml.dvc\": None,\n        }\n    }\n    assert _simplify_tree(files) == expected\n\n    files = ls_tree(os.fspath(tmp_dir), \"model\")\n    assert _simplify_tree(files) == {\n        \"model\": {\n            \".gitignore\": None,\n            \"people.csv\": None,\n            \"people.csv.dvc\": None,\n            \"script.py\": None,\n            \"train.py\": None,\n        }\n    }\n\n\ndef test_ls_tree_dvc_only(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = ls_tree(os.fspath(tmp_dir), dvc_only=True)\n\n    expected = {\n        \".\": {\n            \"data\": {\n                \"subcontent\": {\"data.xml\": None, \"statistics\": {\"data.csv\": None}}\n            },\n            \"model\": {\"people.csv\": None},\n            \"structure.xml\": None,\n        }\n    }\n    assert _simplify_tree(files) == expected\n\n\ndef test_ls_tree_maxdepth(tmp_dir, scm, dvc):\n    tmp_dir.scm_gen(FS_STRUCTURE, commit=\"init\")\n    tmp_dir.dvc_gen(DVC_STRUCTURE, commit=\"dvc\")\n\n    files = ls_tree(os.fspath(tmp_dir), maxdepth=0)\n    assert _simplify_tree(files) == {\".\": None}\n\n    files = ls_tree(os.fspath(tmp_dir), maxdepth=1)\n    assert _simplify_tree(files) == {\n        \".\": {\n            \".dvcignore\": None,\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"data\": None,\n            \"model\": None,\n            \"structure.xml\": None,\n            \"structure.xml.dvc\": None,\n        }\n    }\n\n    files = ls_tree(os.fspath(tmp_dir), maxdepth=2)\n    assert _simplify_tree(files) == {\n        \".\": {\n            \".dvcignore\": None,\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"data\": {\"subcontent\": None},\n            \"model\": {\n                \".gitignore\": None,\n                \"people.csv\": None,\n                \"people.csv.dvc\": None,\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n            \"structure.xml\": None,\n            \"structure.xml.dvc\": None,\n        }\n    }\n\n    files = ls_tree(os.fspath(tmp_dir), maxdepth=3)\n    assert _simplify_tree(files) == {\n        \".\": {\n            \".dvcignore\": None,\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"data\": {\n                \"subcontent\": {\n                    \".gitignore\": None,\n                    \"data.xml\": None,\n                    \"data.xml.dvc\": None,\n                    \"statistics\": None,\n                }\n            },\n            \"model\": {\n                \".gitignore\": None,\n                \"people.csv\": None,\n                \"people.csv.dvc\": None,\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n            \"structure.xml\": None,\n            \"structure.xml.dvc\": None,\n        }\n    }\n\n    files = ls_tree(os.fspath(tmp_dir), maxdepth=4)\n    assert _simplify_tree(files) == {\n        \".\": {\n            \".dvcignore\": None,\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"data\": {\n                \"subcontent\": {\n                    \".gitignore\": None,\n                    \"data.xml\": None,\n                    \"data.xml.dvc\": None,\n                    \"statistics\": {\n                        \".gitignore\": None,\n                        \"data.csv\": None,\n                        \"data.csv.dvc\": None,\n                    },\n                }\n            },\n            \"model\": {\n                \".gitignore\": None,\n                \"people.csv\": None,\n                \"people.csv.dvc\": None,\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n            \"structure.xml\": None,\n            \"structure.xml.dvc\": None,\n        }\n    }\n\n\ndef test_fs_ls_tree():\n    fs = MemoryFileSystem(global_store=False)\n    fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})\n    root = fs.root_marker\n\n    files = _ls_tree(fs, \"README.md\")\n    assert _simplify_tree(files) == {\"README.md\": None}\n    files = _ls_tree(fs, root)\n    expected = {\n        root: {\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"model\": {\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n        }\n    }\n    assert _simplify_tree(files) == expected\n\n    files = _ls_tree(fs, \"model\")\n    assert _simplify_tree(files) == {\n        \"model\": {\n            \"script.py\": None,\n            \"train.py\": None,\n        }\n    }\n\n\ndef test_fs_ls_tree_maxdepth():\n    fs = MemoryFileSystem(global_store=False)\n    fs.pipe({f: content.encode() for f, content in FS_STRUCTURE.items()})\n\n    files = _ls_tree(fs, \"/\", maxdepth=0)\n    assert _simplify_tree(files) == {\"/\": None}\n\n    files = _ls_tree(fs, \"/\", maxdepth=1)\n    assert _simplify_tree(files) == {\n        \"/\": {\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"model\": None,\n        }\n    }\n\n    files = _ls_tree(fs, \"/\", maxdepth=2)\n    assert _simplify_tree(files) == {\n        \"/\": {\n            \".gitignore\": None,\n            \"README.md\": None,\n            \"model\": {\n                \"script.py\": None,\n                \"train.py\": None,\n            },\n        }\n    }\n\n    files = _ls_tree(fs, \"README.md\", maxdepth=3)\n    assert _simplify_tree(files) == {\"README.md\": None}\n"
  },
  {
    "path": "tests/func/test_ls_url.py",
    "content": "from dvc.config import Config\nfrom dvc.repo import Repo\nfrom dvc.testing.workspace_tests import TestLsUrl as _TestLsUrl\n\n\nclass TestLsUrl(_TestLsUrl):\n    pass\n\n\ndef test_ls_url_config(dvc, make_remote):\n    remote_path = make_remote(\"myremote\", default=False, typ=\"local\")\n    (remote_path / \"foo\").write_text(\"foo\")\n    (remote_path / \"bar\").write_text(\"bar\")\n\n    actual = sorted(\n        Repo.ls_url(\"remote://myremote\", config=Config.from_cwd()),\n        key=lambda entry: entry[\"path\"],\n    )\n    expected = [\n        {\"isdir\": False, \"path\": \"bar\", \"size\": 3},\n        {\"isdir\": False, \"path\": \"foo\", \"size\": 3},\n    ]\n    assert actual == expected\n"
  },
  {
    "path": "tests/func/test_merge_driver.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.utils.fs import remove\n\n\ndef _gen(tmp_dir, struct, name):\n    remove(tmp_dir / \"data\")\n    if struct is None:\n        (tmp_dir / name).touch()\n    else:\n        (stage,) = tmp_dir.dvc_gen({\"data\": struct})\n        os.rename(stage.path, name)\n\n\n@pytest.mark.parametrize(\n    \"ancestor, our, their, merged\",\n    [\n        (\n            {\"foo\": \"foo\"},\n            {\"foo\": \"foo\", \"bar\": \"bar\"},\n            {\"foo\": \"foo\", \"baz\": \"baz\"},\n            {\"foo\": \"foo\", \"bar\": \"bar\", \"baz\": \"baz\"},\n        ),\n        (\n            {\"common\": \"common\", \"subdir\": {\"foo\": \"foo\"}},\n            {\"common\": \"common\", \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"}},\n            {\"common\": \"common\", \"subdir\": {\"foo\": \"foo\", \"baz\": \"baz\"}},\n            {\n                \"common\": \"common\",\n                \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"baz\": \"baz\"},\n            },\n        ),\n        ({}, {\"foo\": \"foo\"}, {\"bar\": \"bar\"}, {\"foo\": \"foo\", \"bar\": \"bar\"}),\n        ({}, {}, {\"bar\": \"bar\"}, {\"bar\": \"bar\"}),\n        ({}, {\"foo\": \"foo\"}, {}, {\"foo\": \"foo\"}),\n        (None, {\"foo\": \"foo\"}, {\"bar\": \"bar\"}, {\"foo\": \"foo\", \"bar\": \"bar\"}),\n        (None, None, {\"bar\": \"bar\"}, {\"bar\": \"bar\"}),\n        (None, {\"foo\": \"foo\"}, None, {\"foo\": \"foo\"}),\n        (\n            {\"foo\": \"foo\"},\n            {\"foo\": \"bar\"},\n            {\"foo\": \"foo\", \"baz\": \"baz\"},\n            {\"foo\": \"bar\", \"baz\": \"baz\"},\n        ),\n        ({\"foo\": \"foo\"}, {}, {\"foo\": \"foo\", \"bar\": \"bar\"}, {\"bar\": \"bar\"}),\n        (\n            {\"common\": \"common\", \"subdir\": {\"foo\": \"foo\", \"bar\": \"bar\"}},\n            {\"common\": \"common\", \"subdir\": {\"foo\": \"foo\", \"bar\": \"baz\"}},\n            {\"common\": \"common\", \"subdir\": {\"bar\": \"bar\", \"bizz\": \"bizz\"}},\n            {\n                \"common\": \"common\",\n                \"subdir\": {\"bar\": \"baz\", \"bizz\": \"bizz\"},\n            },\n        ),\n    ],\n)\ndef test_merge(tmp_dir, dvc, ancestor, our, their, merged):\n    _gen(tmp_dir, ancestor, \"ancestor\")\n    _gen(tmp_dir, our, \"our\")\n    _gen(tmp_dir, their, \"their\")\n\n    assert (\n        main(\n            [\n                \"git-hook\",\n                \"merge-driver\",\n                \"--ancestor\",\n                \"ancestor\",\n                \"--our\",\n                \"our\",\n                \"--their\",\n                \"their\",\n            ]\n        )\n        == 0\n    )\n\n    _gen(tmp_dir, merged, \"merged\")\n\n    assert (tmp_dir / \"our\").read_text() == (tmp_dir / \"merged\").read_text()\n\n\n@pytest.mark.parametrize(\n    \"ancestor, our, their, error\",\n    [\n        (\n            {\"foo\": \"foo\"},\n            {\"foo\": \"bar\"},\n            {\"foo\": \"baz\"},\n            \"unable to auto-merge the following paths:\\nfoo\",\n        ),\n        (\n            {\"common\": \"common\", \"foo\": \"foo\"},\n            {\"common\": \"common\", \"bar\": \"bar\"},\n            {\"baz\": \"baz\"},\n            \"unable to auto-merge the following paths:\\nboth deleted: ('foo',)\",\n        ),\n    ],\n)\ndef test_merge_conflict(tmp_dir, dvc, ancestor, our, their, error, caplog):\n    _gen(tmp_dir, ancestor, \"ancestor\")\n    _gen(tmp_dir, our, \"our\")\n    _gen(tmp_dir, their, \"their\")\n\n    assert (\n        main(\n            [\n                \"git-hook\",\n                \"merge-driver\",\n                \"--ancestor\",\n                \"ancestor\",\n                \"--our\",\n                \"our\",\n                \"--their\",\n                \"their\",\n            ]\n        )\n        != 0\n    )\n\n    assert error in caplog.text\n\n\ndef test_merge_different_output_options(tmp_dir, dvc, caplog):\n    (tmp_dir / \"ancestor\").touch()\n\n    (tmp_dir / \"our\").write_text(\n        \"outs:\\n- md5: f123456789.dir\\n  hash: md5\\n  path: path\\n\"\n    )\n\n    (tmp_dir / \"their\").write_text(\n        r\"outs:\"\n        \"\\n\"\n        \"- md5: f987654321.dir\\n\"\n        \"  hash: md5\\n\"\n        \"  path: path\\n\"\n        \"  cache: false\\n\"\n    )\n\n    assert (\n        main(\n            [\n                \"git-hook\",\n                \"merge-driver\",\n                \"--ancestor\",\n                \"ancestor\",\n                \"--our\",\n                \"our\",\n                \"--their\",\n                \"their\",\n            ]\n        )\n        != 0\n    )\n\n    error = \"unable to auto-merge outputs with different options\"\n    assert error in caplog.text\n\n\ndef test_merge_file(tmp_dir, dvc, caplog):\n    (tmp_dir / \"ancestor\").touch()\n\n    (tmp_dir / \"our\").write_text(\n        \"outs:\\n- md5: f123456789.dir\\n  hash: md5\\n  path: path\\n\"\n    )\n\n    (tmp_dir / \"their\").write_text(\n        \"outs:\\n- md5: f987654321\\n  hash: md5\\n  path: path\\n\"\n    )\n\n    assert (\n        main(\n            [\n                \"git-hook\",\n                \"merge-driver\",\n                \"--ancestor\",\n                \"ancestor\",\n                \"--our\",\n                \"our\",\n                \"--their\",\n                \"their\",\n            ]\n        )\n        != 0\n    )\n\n    err = \"unable to auto-merge outputs that are not directories\"\n    assert err in caplog.text\n\n\ndef test_merge_non_dvc_add(tmp_dir, dvc, caplog):\n    (tmp_dir / \"ancestor\").touch()\n\n    (tmp_dir / \"our\").write_text(\n        \"outs:\\n\"\n        \"- md5: f123456789.dir\\n\"\n        \"  hash: md5\\n\"\n        \"  path: path\\n\"\n        \"- md5: ff123456789.dir\\n\"\n        \"  hash: md5\\n\"\n        \"  path: another\\n\"\n    )\n\n    (tmp_dir / \"their\").write_text(\n        \"outs:\\n- md5: f987654321\\n  hash: md5\\n  path: path\\n\"\n    )\n\n    assert (\n        main(\n            [\n                \"git-hook\",\n                \"merge-driver\",\n                \"--ancestor\",\n                \"ancestor\",\n                \"--our\",\n                \"our\",\n                \"--their\",\n                \"their\",\n            ]\n        )\n        != 0\n    )\n\n    error = \"unable to auto-merge DVC files that weren't created by `dvc add`\"\n    assert error in caplog.text\n"
  },
  {
    "path": "tests/func/test_move.py",
    "content": "import os\nimport shutil\nimport textwrap\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.exceptions import MoveNotDataSourceError, OutputNotFoundError\nfrom dvc.stage.exceptions import StageFileAlreadyExistsError\n\n\ndef test_move(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"foo\", \"bar\")\n    assert (tmp_dir / \"foo.dvc\").exists()\n    dvc.move(\"foo\", \"bar\")\n\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"bar.dvc\").exists()\n    assert not (tmp_dir / \"foo\").is_file()\n    assert (tmp_dir / \"bar\").is_file()\n    # should only have the new path in the .gitignore, and only once\n    assert (tmp_dir / \".gitignore\").read_text().splitlines() == [\"/bar\"]\n\n\ndef test_move_non_existent_file(dvc):\n    with pytest.raises(OutputNotFoundError):\n        dvc.move(\"non_existent_file\", \"dst\")\n\n\ndef test_move_missing_file(tmp_dir, dvc, scm, caplog):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (tmp_dir / \"foo\").unlink()\n    contents = (tmp_dir / \"foo.dvc\").parse()\n    dvc.move(\"foo\", \"bar\")\n\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    # only the path should be changed in the dvc file\n    contents[\"outs\"][0][\"path\"] = \"bar\"\n    assert contents == (tmp_dir / \"bar.dvc\").parse()\n\n    # file should not be checked out\n    assert not (tmp_dir / \"foo\").is_file()\n    assert not (tmp_dir / \"bar\").is_file()\n    # should only have the new path in the .gitignore, and only once\n    assert (tmp_dir / \".gitignore\").read_text().splitlines() == [\"/bar\"]\n\n\ndef test_move_directory(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"data\", {\"foo\": \"foo\", \"bar\": \"bar\"})\n    dvc.move(\"data\", \"dst\")\n    assert not (tmp_dir / \"data\").is_dir()\n    assert (tmp_dir / \"dst\").is_dir()\n\n\ndef test_cmd_move(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert main([\"move\", \"foo\", \"foo1\"]) == 0\n    assert main([\"move\", \"non-existing-file\", \"dst\"]) != 0\n\n\ndef test_move_not_data_source(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.run(cmd=\"cp foo file1\", outs=[\"file1\"], deps=[\"foo\"], name=\"copy-foo-file1\")\n\n    with pytest.raises(MoveNotDataSourceError):\n        dvc.move(\"file1\", \"dst\")\n\n    assert main([\"move\", \"file1\", \"dst\"]) != 0\n    assert (tmp_dir / \"file1\").exists()\n\n\ndef test_move_file_with_extension(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file.csv\", \"1,2,3\\n\")\n\n    assert main([\"move\", \"file.csv\", \"other_name.csv\"]) == 0\n    assert not (tmp_dir / \"file.csv\").exists()\n    assert not (tmp_dir / \"file.csv.dvc\").exists()\n    assert (tmp_dir / \"other_name.csv\").exists()\n    assert (tmp_dir / \"other_name.csv.dvc\").exists()\n\n\ndef test_move_file_to_directory(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.gen({\"data\": {\"bar\": \"bar\"}})\n\n    assert main([\"move\", \"foo\", os.path.join(\"data\", \"foo\")]) == 0\n    assert not (tmp_dir / \"foo\").exists()\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"data\" / \"foo\").exists()\n    assert (tmp_dir / \"data\" / \"foo.dvc\").exists()\n\n\ndef test_move_file_to_directory_without_specified_target_name(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    tmp_dir.gen({\"data\": {\"bar\": \"bar\"}})\n\n    assert main([\"move\", \"foo\", \"data\"]) == 0\n    assert not (tmp_dir / \"foo\").exists()\n    assert not (tmp_dir / \"foo.dvc\").exists()\n    assert (tmp_dir / \"data\" / \"foo\").exists()\n    assert (tmp_dir / \"data\" / \"foo.dvc\").exists()\n\n    new_stage = (tmp_dir / \"data\" / \"foo.dvc\").load_yaml()\n    assert new_stage[\"outs\"][0][\"path\"] == \"foo\"\n\n\ndef test_move_directory_should_not_overwrite_existing(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\"}})\n    new_dir = tmp_dir / \"dir\"\n    new_dir.mkdir()\n\n    dvc.move(\"data\", \"dir\")\n    assert not (tmp_dir / \"data\").exists()\n    assert not (tmp_dir / \"data.dvc\").exists()\n    assert set(new_dir.iterdir()) == {\n        new_dir / \".gitignore\",\n        new_dir / \"data.dvc\",\n        new_dir / \"data\",\n    }\n    assert set((new_dir / \"data\").iterdir()) == {new_dir / \"data\" / \"foo\"}\n\n\ndef test_move_file_between_directories(tmp_dir, dvc):\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n    dvc.add(os.path.join(\"data\", \"foo\"))\n\n    (tmp_dir / \"data2\").mkdir()\n\n    assert main([\"move\", os.path.join(\"data\", \"foo\"), \"data2\"]) == 0\n    assert not (tmp_dir / \"data\" / \"foo\").exists()\n    assert not (tmp_dir / \"data\" / \"foo.dvc\").exists()\n    assert (tmp_dir / \"data2\" / \"foo\").exists()\n    assert (tmp_dir / \"data2\" / \"foo.dvc\").exists()\n\n    d = (tmp_dir / \"data2\" / \"foo.dvc\").load_yaml()\n    assert d[\"outs\"][0][\"path\"] == \"foo\"\n\n\ndef test_move_file_inside_directory(tmp_dir, dvc):\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\"}})\n    file = tmp_dir / \"data\" / \"foo\"\n    dvc.add(file.fs_path)\n\n    with (tmp_dir / \"data\").chdir():\n        assert main([\"move\", \"foo\", \"data.txt\"]) == 0\n\n    assert not file.exists()\n    assert (tmp_dir / \"data\" / \"data.txt\").exists()\n    assert (tmp_dir / \"data\" / \"data.txt.dvc\").exists()\n\n\ndef test_move_should_save_stage_info(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"old_name\": {\"file1\": \"file1\"}})\n\n    dvc.move(\"old_name\", \"new_name\")\n\n    assert dvc.status() == {}\n\n\ndef test_should_move_to_dir_on_non_default_stage_file(tmp_dir, dvc):\n    tmp_dir.gen({\"file\": \"file_content\"})\n\n    dvc.add(\"file\")\n    shutil.move(\"file.dvc\", \"stage.dvc\")\n    os.mkdir(\"directory\")\n\n    dvc.move(\"file\", \"directory\")\n\n    assert os.path.exists(os.path.join(\"directory\", \"file\"))\n\n\ndef test_move_gitignored(tmp_dir, scm, dvc):\n    from dvc.dvcfile import FileIsGitIgnored\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n\n    os.mkdir(\"dir\")\n    (tmp_dir / \"dir\").gen(\".gitignore\", \"*\")\n\n    with pytest.raises(FileIsGitIgnored):\n        dvc.move(\"foo\", \"dir\")\n\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"foo.dvc\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo.dvc\").exists()\n\n\ndef test_move_output_overlap(tmp_dir, dvc):\n    from dvc.exceptions import OverlappingOutputPathsError\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"dir\": {\"bar\": \"bar\"}})\n\n    with pytest.raises(OverlappingOutputPathsError):\n        dvc.move(\"foo\", \"dir\")\n\n    assert (tmp_dir / \"foo\").read_text() == \"foo\"\n    assert (tmp_dir / \"foo.dvc\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo\").exists()\n    assert not (tmp_dir / \"dir\" / \"foo.dvc\").exists()\n\n\ndef test_move_meta(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    data = (tmp_dir / stage.path).parse()\n    data[\"meta\"] = {\"custom_key\": 42}\n    (tmp_dir / stage.path).dump(data)\n\n    dvc.move(\"foo\", \"bar\")\n    res = (tmp_dir / \"bar.dvc\").read_text()\n    assert res == textwrap.dedent(\n        \"\"\"\\\n        outs:\n        - md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n          path: bar\n        meta:\n          custom_key: 42\n    \"\"\"\n    )\n\n\ndef test_import(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n    imp_stage = dvc.imp(os.curdir, \"foo\", \"foo_imported\")\n\n    dvc.move(\"foo_imported\", \"foo_moved\")\n\n    (stage,) = dvc.stage.collect(\"foo_moved.dvc\")\n    assert imp_stage.md5 != stage.md5\n    res = (tmp_dir / \"foo_moved.dvc\").read_text()\n    assert res == textwrap.dedent(\n        f\"\"\"\\\n        md5: {stage.md5}\n        frozen: true\n        deps:\n        - path: foo\n          repo:\n            url: {os.curdir}\n            rev_lock: {scm.get_rev()}\n        outs:\n        - md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n          path: foo_moved\n    \"\"\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"path_func\",\n    [pytest.param(os.path.abspath, id=\"abs\"), pytest.param(os.path.relpath, id=\"rel\")],\n)\ndef test_import_url_in_repo(tmp_dir, dvc, path_func):\n    tmp_dir.gen(\"foo\", \"foo\")\n    imp_stage = dvc.imp_url(path_func(tmp_dir / \"foo\"), \"foo_imported\")\n    (tmp_dir / \"data\").mkdir()\n\n    dvc.move(\"foo_imported\", os.path.join(\"data\", \"foo_moved\"))\n\n    (stage,) = dvc.stage.collect(os.path.join(\"data\", \"foo_moved.dvc\"))\n    assert imp_stage.md5 != stage.md5\n    res = (tmp_dir / \"data\" / \"foo_moved.dvc\").read_text()\n    assert res == textwrap.dedent(\n        f\"\"\"\\\n        md5: {stage.md5}\n        frozen: true\n        deps:\n        - md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n          path: ../foo\n        outs:\n        - md5: acbd18db4cc2f85cedef654fccc4a4d8\n          size: 3\n          hash: md5\n          path: foo_moved\n    \"\"\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"path_func\",\n    [pytest.param(os.path.abspath, id=\"abs\"), pytest.param(os.path.relpath, id=\"rel\")],\n)\ndef test_import_url_out_of_repo(tmp_dir, dvc, scm, path_func, make_tmp_dir):\n    external = make_tmp_dir(\"external\")\n    external.gen(\"foo\", \"foo\")\n\n    imp_stage = dvc.imp_url(path_func(external / \"foo\"), \"foo_imported\")\n\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n\n    new_path = data_dir / \"foo_moved\"\n    new_dvcfile = new_path.with_suffix(\".dvc\")\n    dvc.move(\"foo_imported\", os.fspath(new_path))\n\n    (stage,) = dvc.stage.collect(os.fspath(new_dvcfile))\n    assert imp_stage.md5 != stage.md5\n\n    with data_dir.chdir():\n        expected_path = path_func(external / \"foo\")\n\n    assert new_dvcfile.parse() == {\n        \"md5\": stage.md5,\n        \"frozen\": True,\n        \"deps\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"size\": 3,\n                \"hash\": \"md5\",\n                \"path\": expected_path,\n            }\n        ],\n        \"outs\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"size\": 3,\n                \"hash\": \"md5\",\n                \"path\": \"foo_moved\",\n            }\n        ],\n    }\n\n\n@pytest.mark.parametrize(\n    \"path_func\",\n    [pytest.param(os.path.abspath, id=\"abs\"), pytest.param(os.path.relpath, id=\"rel\")],\n)\ndef test_all_metadata_are_preserved(tmp_dir, dvc, make_tmp_dir, path_func):\n    external = make_tmp_dir(\"external\")\n    external.gen(\"foo\", \"foo\")\n\n    contents = {\n        \"md5\": \"bad\",  # placeholder, does not matter for the test\n        \"frozen\": True,\n        \"desc\": \"this is a stage description\",\n        \"always_changed\": True,\n        \"meta\": {\"custom_key\": 42},\n        \"deps\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"size\": 3,\n                \"hash\": \"md5\",\n                \"path\": path_func(external / \"foo\"),\n            }\n        ],\n        \"outs\": [\n            {\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n                \"path\": \"foo_imported\",\n                \"persist\": True,\n                \"hash\": \"md5\",\n                \"size\": 3,\n                \"desc\": \"this is a description\",\n                \"type\": \"model\",\n                \"labels\": [\"label1\", \"label2\"],\n                \"meta\": {\"custom_key\": 42},\n                \"cache\": False,\n                \"remote\": \"myremote\",\n                \"push\": False,\n            }\n        ],\n    }\n    (tmp_dir / \"foo_imported.dvc\").dump(contents)\n    (tmp_dir / \"foo_imported\").write_text(\"foo\")\n\n    data_dir = tmp_dir / \"data\"\n    data_dir.mkdir()\n\n    new_path = data_dir / \"foo_moved\"\n    new_dvcfile = new_path.with_suffix(\".dvc\")\n    dvc.move(\"foo_imported\", os.fspath(new_path))\n\n    (stage,) = dvc.stage.collect(os.fspath(new_dvcfile))\n\n    with data_dir.chdir():\n        expected_path = path_func(external / \"foo\")\n\n    contents[\"outs\"][0] |= {\"path\": \"foo_moved\"}\n    contents[\"deps\"][0] |= {\"path\": expected_path}\n    contents |= {\"md5\": stage.md5}\n    assert new_dvcfile.parse() == contents\n\n\ndef test_move_dst_stage_file_already_exists(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    with pytest.raises(StageFileAlreadyExistsError) as exc_info:\n        dvc.move(\"foo\", \"bar\")\n    assert str(exc_info.value) == \"'bar.dvc' already exists\"\n    assert exc_info.value.__cause__ is None\n"
  },
  {
    "path": "tests/func/test_odb.py",
    "content": "import os\nimport stat\n\nimport configobj\nimport pytest\n\nfrom dvc.cachemgr import CacheManager\nfrom dvc.cli import main\nfrom dvc.utils import relpath\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom dvc_objects.errors import ObjectFormatError\n\n\ndef test_cache(tmp_dir, dvc):\n    cache1_md5 = \"123\"\n    cache2_md5 = \"234\"\n    cache1 = os.path.join(dvc.cache.local.path, cache1_md5[:2], cache1_md5[2:])\n    cache2 = os.path.join(dvc.cache.local.path, cache2_md5[:2], cache2_md5[2:])\n    tmp_dir.gen({cache1: \"1\", cache2: \"2\"})\n\n    assert os.path.exists(cache1)\n    assert os.path.exists(cache2)\n\n    odb = CacheManager(dvc)\n\n    md5_list = list(odb.local.all())\n    assert len(md5_list) == 2\n    assert cache1_md5 in md5_list\n    assert cache2_md5 in md5_list\n\n    odb_cache1 = odb.local.oid_to_path(cache1_md5)\n    odb_cache2 = odb.local.oid_to_path(cache2_md5)\n    assert os.fspath(odb_cache1) == cache1\n    assert os.fspath(odb_cache2) == cache2\n\n\ndef test_cache_load_bad_dir_cache(tmp_dir, dvc):\n    from dvc_data.hashfile import load\n\n    dir_hash = \"123.dir\"\n    fname = os.fspath(dvc.cache.local.oid_to_path(dir_hash))\n    tmp_dir.gen({fname: \"<clearly>not,json\"})\n    with pytest.raises(ObjectFormatError):\n        load(dvc.cache.local, HashInfo(\"md5\", dir_hash))\n\n    dir_hash = \"234.dir\"\n    fname = os.fspath(dvc.cache.local.oid_to_path(dir_hash))\n    tmp_dir.gen({fname: '{\"a\": \"b\"}'})\n    with pytest.raises(ObjectFormatError):\n        load(dvc.cache.local, HashInfo(\"md5\", dir_hash))\n\n\ndef test_external_cache_dir(tmp_dir, dvc, make_tmp_dir):\n    cache_dir = make_tmp_dir(\"cache\")\n\n    with dvc.config.edit() as conf:\n        conf[\"cache\"][\"dir\"] = cache_dir.fs_path\n    assert not os.path.exists(dvc.cache.local.path)\n    dvc.cache = CacheManager(dvc)\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n\n    tmp_dir.dvc_gen(\n        {\n            \"data_dir\": {\n                \"data\": \"data_dir/data\",\n                \"data_sub_dir\": {\"data_sub\": \"data_dir/data_sub_dir/data_sub\"},\n            }\n        }\n    )\n\n    assert not os.path.exists(\".dvc/cache\")\n    assert len(os.listdir(cache_dir)) != 0\n\n\ndef test_shared_cache_dir(tmp_dir):\n    cache_dir = os.path.abspath(os.path.join(os.curdir, \"cache\"))\n    for d in [\"dir1\", \"dir2\"]:\n        os.mkdir(d)\n        with (tmp_dir / d).chdir():\n            ret = main([\"init\", \"--no-scm\"])\n            assert ret == 0\n\n            ret = main([\"config\", \"cache.dir\", cache_dir])\n            assert ret == 0\n\n            assert not os.path.exists(os.path.join(\".dvc\", \"cache\"))\n\n            (tmp_dir / d).gen({\"common\": \"common\", \"unique\": d})\n\n            ret = main([\"add\", \"common\", \"unique\"])\n            assert ret == 0\n\n    assert not os.path.exists(os.path.join(\"dir1\", \".dvc\", \"cache\"))\n    assert not os.path.exists(os.path.join(\"dir2\", \".dvc\", \"cache\"))\n    assert os.path.exists(\n        os.path.join(cache_dir, \"files\", \"md5\", \"dc\", \"f6c2fa538b445a3a095255c3641dfc\")\n    )\n    assert os.path.exists(\n        os.path.join(cache_dir, \"files\", \"md5\", \"b4\", \"333c8cfa2ebba7ef20ec6c3265902b\")\n    )\n    assert os.path.exists(\n        os.path.join(cache_dir, \"files\", \"md5\", \"9e\", \"fab2399c7c560b34de477b9aa0a465\")\n    )\n\n\ndef test_cache_link_type(tmp_dir, scm, dvc):\n    with dvc.config.edit() as conf:\n        conf[\"cache\"][\"type\"] = \"reflink,copy\"\n    dvc.cache = CacheManager(dvc)\n\n    stages = tmp_dir.dvc_gen({\"foo\": \"foo\"})\n    assert len(stages) == 1\n    assert (tmp_dir / \"foo\").read_text().strip() == \"foo\"\n\n\ndef test_cmd_cache_dir(tmp_dir, scm, dvc):\n    ret = main([\"cache\", \"dir\"])\n    assert ret == 0\n\n\ndef test_cmd_cache_abs_path(tmp_dir, scm, dvc, make_tmp_dir):\n    cache_dir = make_tmp_dir(\"cache\")\n    ret = main([\"cache\", \"dir\", cache_dir.fs_path])\n    assert ret == 0\n\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config[\"cache\"][\"dir\"] == cache_dir.fs_path\n\n\ndef test_cmd_cache_relative_path(tmp_dir, scm, dvc, make_tmp_dir):\n    cache_dir = make_tmp_dir(\"cache\")\n    dname = relpath(cache_dir)\n    ret = main([\"cache\", \"dir\", dname])\n    assert ret == 0\n\n    dvc.config.load()\n    dvc.cache = CacheManager(dvc)\n\n    # NOTE: we are in the repo's root and config is in .dvc/, so\n    # dir path written to config should be just one level above.\n    rel = os.path.join(\"..\", dname)\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config[\"cache\"][\"dir\"] == rel.replace(\"\\\\\", \"/\")\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n\n    assert os.path.exists(\n        os.path.join(cache_dir, \"files\", \"md5\", \"ac\", \"bd18db4cc2f85cedef654fccc4a4d8\")\n    )\n\n\ndef test_default_cache_type(dvc):\n    assert dvc.cache.local.cache_types == [\"reflink\", \"copy\"]\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"Not supported for Windows.\")\n@pytest.mark.parametrize(\"group\", [False, True])\ndef test_shared_cache(tmp_dir, dvc, group):\n    from dvc_data.hashfile.db.local import umask\n\n    if group:\n        with dvc.config.edit() as conf:\n            conf[\"cache\"].update({\"shared\": \"group\"})\n    dvc.cache = CacheManager(dvc)\n    cache_dir = dvc.cache.local.path\n\n    assert not os.path.exists(cache_dir)\n\n    tmp_dir.dvc_gen({\"file\": \"file content\", \"dir\": {\"file2\": \"file 2 content\"}})\n\n    file_mode = oct(0o444)\n    dir_mode = oct(0o2775 if group else (0o777 & ~umask))\n    for root, dnames, fnames in os.walk(cache_dir):\n        for dname in dnames:\n            path = os.path.join(root, dname)\n            assert oct(stat.S_IMODE(os.stat(path).st_mode)) == dir_mode\n\n        for fname in fnames:\n            path = os.path.join(root, fname)\n            assert oct(stat.S_IMODE(os.stat(path).st_mode)) == file_mode\n"
  },
  {
    "path": "tests/func/test_remote.py",
    "content": "import errno\nimport itertools\nimport os\nimport stat\n\nimport configobj\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.config import Config\nfrom dvc.exceptions import DownloadError, UploadError\nfrom dvc.utils.fs import remove\n\n\ndef test_remote(dvc):\n    remotes = [\"a\", \"b\", \"c\"]\n\n    assert main([\"remote\", \"list\"]) == 0\n    assert main([\"remote\", \"remove\", remotes[0]]) != 0\n\n    for r in remotes:\n        assert main([\"remote\", \"add\", \"--default\", r, \"s3://bucket/name\"]) == 0\n\n    assert main([\"remote\", \"list\"]) == 0\n\n    assert main([\"remote\", \"modify\", remotes[0], \"checksum_jobs\", \"1\"]) == 0\n    assert main([\"remote\", \"remove\", remotes[0]]) == 0\n\n    assert main([\"remote\", \"list\"]) == 0\n\n\ndef test_remote_add_relative_path(dvc):\n    dname = os.path.join(\"..\", \"path\", \"to\", \"dir\")\n    ret = main([\"remote\", \"add\", \"mylocal\", dname])\n    assert ret == 0\n\n    # NOTE: we are in the repo's root and config is in .dvc/, so\n    # dir path written to config should be just one level above.\n    rel = os.path.join(\"..\", dname)\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config['remote \"mylocal\"'][\"url\"] == rel.replace(\"\\\\\", \"/\")\n\n\ndef test_remote_overwrite(dvc):\n    remote_name = \"a\"\n    remote_url = \"s3://bucket/name\"\n    assert main([\"remote\", \"add\", remote_name, remote_url]) == 0\n    assert main([\"remote\", \"add\", remote_name, remote_url]) == 251\n    assert main([\"remote\", \"add\", \"-f\", remote_name, remote_url]) == 0\n\n\ndef test_referencing_other_remotes(dvc):\n    assert main([\"remote\", \"add\", \"foo\", \"ssh://localhost/\"]) == 0\n    assert main([\"remote\", \"add\", \"bar\", \"remote://foo/dvc-storage\"]) == 0\n\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config['remote \"bar\"'][\"url\"] == \"remote://foo/dvc-storage\"\n\n\ndef test_remove_default(tmp_dir, dvc):\n    remote = \"mys3\"\n    assert main([\"remote\", \"add\", \"--default\", remote, \"s3://bucket/name\"]) == 0\n    assert main([\"remote\", \"modify\", remote, \"profile\", \"default\"]) == 0\n    assert main([\"config\", \"--local\", \"core.remote\", remote]) == 0\n\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    local_config = configobj.ConfigObj(dvc.config.files[\"local\"])\n    assert config[\"core\"][\"remote\"] == remote\n    assert local_config[\"core\"][\"remote\"] == remote\n\n    assert main([\"remote\", \"remove\", remote]) == 0\n\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    local_config = configobj.ConfigObj(dvc.config.files[\"local\"])\n    assert config.get(\"core\", {}).get(\"remote\") is None\n    assert local_config.get(\"core\", {}).get(\"remote\") is None\n\n\ndef test_remote_remove(dvc):\n    ret = main([\"config\", \"core.checksum_jobs\", \"1\"])\n    assert ret == 0\n\n    remote = \"mys3\"\n    ret = main([\"remote\", \"add\", remote, \"s3://bucket/name\"])\n    assert ret == 0\n\n    ret = main([\"remote\", \"remove\", remote])\n    assert ret == 0\n\n\ndef test_remote_default_cmd(dvc):\n    remote = \"mys3\"\n    ret = main([\"remote\", \"add\", \"mys3\", \"s3://bucket/path\"])\n    assert ret == 0\n\n    ret = main([\"remote\", \"default\", \"mys3\"])\n    assert ret == 0\n    config_file = os.path.join(dvc.dvc_dir, Config.CONFIG)\n    config = configobj.ConfigObj(config_file)\n    default = config[\"core\"][\"remote\"]\n    assert default == remote\n\n    ret = main([\"remote\", \"default\", \"--unset\"])\n    assert ret == 0\n    config = configobj.ConfigObj(config_file)\n    default = config.get(\"core\", {}).get(\"remote\")\n    assert default is None\n\n\ndef test_show_default(dvc, capsys):\n    assert main([\"remote\", \"add\", \"foo\", \"s3://bucket/name\"]) == 0\n    assert main([\"remote\", \"default\", \"foo\"]) == 0\n    assert main([\"remote\", \"default\"]) == 0\n    out, _ = capsys.readouterr()\n    assert out == \"foo\\n\"\n\n\ndef test_list_shows_default(dvc, capsys):\n    default_remote = \"foo\"\n    other_remote = \"bar\"\n    bucket_url = \"s3://bucket/name\"\n    assert main([\"remote\", \"add\", default_remote, bucket_url]) == 0\n    assert main([\"remote\", \"add\", other_remote, bucket_url]) == 0\n    assert main([\"remote\", \"default\", default_remote]) == 0\n    assert main([\"remote\", \"list\"]) == 0\n    out, _ = capsys.readouterr()\n    out_lines = out.splitlines()\n    assert out_lines[0].split() == [default_remote, bucket_url, \"(default)\"]\n    assert out_lines[1].split() == [other_remote, bucket_url]\n\n\ndef test_upper_case_remote(tmp_dir, dvc, local_cloud):\n    remote_name = \"UPPERCASEREMOTE\"\n\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    ret = main([\"remote\", \"add\", remote_name, local_cloud.url])\n    assert ret == 0\n\n    ret = main([\"push\", \"-r\", remote_name])\n    assert ret == 0\n\n\ndef test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc, mocker):\n    from dvc_data.hashfile.build import build\n    from dvc_data.hashfile.tree import Tree\n\n    tmp_dir.gen({\"data\": {\"1\": \"1 content\", \"2\": \"2 content\"}})\n\n    path = (tmp_dir / \"data\").fs_path\n\n    tree = Tree.from_list([{\"relpath\": \"1\", \"md5\": \"1\"}, {\"relpath\": \"2\", \"md5\": \"2\"}])\n    tree.digest()\n\n    mocker.patch(\"dvc_data.hashfile.build._build_tree\", return_value=(None, tree))\n\n    _, _, obj = build(dvc.cache.local, path, dvc.cache.local.fs, \"md5\")\n    hash1 = obj.hash_info\n\n    # remove the raw dir obj to force building the tree on the next build call\n    dvc.cache.local.fs.remove(dvc.cache.local.oid_to_path(hash1.as_raw().value))\n\n    tree = Tree.from_list([{\"md5\": \"1\", \"relpath\": \"1\"}, {\"md5\": \"2\", \"relpath\": \"2\"}])\n    tree.digest()\n\n    # NOTE: _build_tree already patched above\n    _, _, obj = build(dvc.cache.local, path, dvc.cache.local.fs, \"md5\")\n    hash2 = obj.hash_info\n\n    assert hash1 == hash2\n\n\ndef test_partial_push_n_pull(  # noqa: C901\n    tmp_dir, dvc, tmp_path_factory, local_remote, mocker\n):\n    from dvc_objects.fs import generic\n\n    foo = tmp_dir.dvc_gen({\"foo\": \"foo content\"})[0].outs[0]\n    bar = tmp_dir.dvc_gen({\"bar\": \"bar content\"})[0].outs[0]\n    baz = tmp_dir.dvc_gen({\"baz\": {\"foo\": \"foo content\"}})[0].outs[0]\n\n    # Faulty upload version, failing on foo\n    original = generic.transfer\n    odb = dvc.cloud.get_remote_odb(\"upstream\")\n\n    def unreliable_upload(from_fs, from_info, to_fs, to_info, **kwargs):\n        on_error = kwargs[\"on_error\"]\n        assert on_error\n        if isinstance(from_info, str):\n            from_info = [from_info]\n        else:\n            from_info = list(from_info)\n        if isinstance(to_info, str):\n            to_info = [to_info]\n        else:\n            to_info = list(to_info)\n        for i in range(len(from_info) - 1, -1, -1):\n            from_i = from_info[i]\n            to_i = to_info[i]\n            if os.path.abspath(to_i) == os.path.abspath(\n                odb.get(foo.hash_info.value).path\n            ):\n                if on_error:\n                    on_error(from_i, to_i, Exception(\"stop foo\"))\n                del from_info[i]\n                del to_info[i]\n        return original(from_fs, from_info, to_fs, to_info, **kwargs)\n\n    mock_upload = mocker.patch.object(generic, \"transfer\", unreliable_upload)\n    with pytest.raises(UploadError) as upload_error_info:\n        dvc.push()\n    assert upload_error_info.value.amount == 2\n\n    assert not odb.exists(foo.hash_info.value)\n    assert odb.exists(bar.hash_info.value)\n    assert not odb.exists(baz.hash_info.value)\n    mocker.stop(mock_upload)\n\n    # Push everything and delete local cache\n    dvc.push()\n    dvc.cache.local.clear()\n\n    baz._collect_used_dir_cache()\n\n    def unreliable_download(_from_fs, from_info, _to_fs, to_info, **kwargs):\n        on_error = kwargs[\"on_error\"]\n        assert on_error\n        if isinstance(from_info, str):\n            from_info = [from_info]\n        if isinstance(to_info, str):\n            to_info = [to_info]\n        for from_i, to_i in zip(from_info, to_info):\n            on_error(from_i, to_i, Exception())\n\n    mock_download = mocker.patch.object(generic, \"transfer\", unreliable_download)\n    with pytest.raises(DownloadError) as download_error_info:\n        dvc.pull()\n    # error count should be len(.dir + standalone file checksums)\n    # since files inside dir are ignored if dir cache entry is missing\n    assert download_error_info.value.amount == 2\n    mocker.stop(mock_download)\n\n\ndef test_raise_on_too_many_open_files(\n    tmp_dir, dvc, tmp_path_factory, mocker, local_remote\n):\n    tmp_dir.dvc_gen({\"file\": \"file content\"})\n\n    mocker.patch(\n        \"dvc_objects.fs.generic.transfer\",\n        side_effect=OSError(errno.EMFILE, \"Too many open files\"),\n    )\n\n    with pytest.raises(OSError, match=\"Too many open files\") as e:\n        dvc.push()\n    assert e.value.errno == errno.EMFILE\n\n\ndef test_modify_missing_remote(tmp_dir, dvc):\n    assert main([\"remote\", \"modify\", \"myremote\", \"user\", \"xxx\"]) == 251\n\n\ndef test_remote_modify_local_on_repo_config(tmp_dir, dvc):\n    assert main([\"remote\", \"add\", \"myremote\", \"http://example.com/path\"]) == 0\n    assert main([\"remote\", \"modify\", \"myremote\", \"user\", \"xxx\", \"--local\"]) == 0\n    assert dvc.config.load_one(\"local\")[\"remote\"][\"myremote\"] == {\"user\": \"xxx\"}\n    assert dvc.config.load_one(\"repo\")[\"remote\"][\"myremote\"] == {\n        \"url\": \"http://example.com/path\"\n    }\n    dvc.config.load()\n    assert dvc.config[\"remote\"][\"myremote\"] == {\n        \"url\": \"http://example.com/path\",\n        \"user\": \"xxx\",\n        \"verify\": False,\n    }\n\n\ndef test_push_order(tmp_dir, dvc, tmp_path_factory, mocker, local_remote):\n    from dvc_objects.fs import generic\n\n    foo = tmp_dir.dvc_gen({\"foo\": {\"bar\": \"bar content\"}})[0].outs[0]\n    tmp_dir.dvc_gen({\"baz\": \"baz content\"})\n\n    mocked_upload = mocker.spy(generic, \"transfer\")\n    dvc.push()\n\n    # foo .dir file should be uploaded after bar\n    odb = dvc.cloud.get_remote_odb(\"upstream\")\n    foo_path = odb.oid_to_path(foo.hash_info.value)\n    bar_path = odb.oid_to_path(foo.obj._trie[(\"bar\",)][1].value)\n    paths = list(\n        itertools.chain.from_iterable(\n            args[3] for args, _ in mocked_upload.call_args_list\n        )\n    )\n    assert paths.index(foo_path) > paths.index(bar_path)\n\n\ndef test_remote_modify_validation(dvc):\n    remote_name = \"drive\"\n    unsupported_config = \"unsupported_config\"\n    assert main([\"remote\", \"add\", \"-d\", remote_name, \"gdrive://test/test\"]) == 0\n    assert (\n        main([\"remote\", \"modify\", remote_name, unsupported_config, \"something\"]) == 251\n    )\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert unsupported_config not in config[f'remote \"{remote_name}\"']\n\n\ndef test_remote_modify_unset(dvc):\n    assert main([\"remote\", \"add\", \"-d\", \"myremote\", \"gdrive://test/test\"]) == 0\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config['remote \"myremote\"'] == {\"url\": \"gdrive://test/test\"}\n\n    assert main([\"remote\", \"modify\", \"myremote\", \"gdrive_client_id\", \"something\"]) == 0\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config['remote \"myremote\"'] == {\n        \"url\": \"gdrive://test/test\",\n        \"gdrive_client_id\": \"something\",\n    }\n\n    assert main([\"remote\", \"modify\", \"myremote\", \"gdrive_client_id\", \"--unset\"]) == 0\n    config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    assert config['remote \"myremote\"'] == {\"url\": \"gdrive://test/test\"}\n\n\ndef test_remote_modify_default(dvc):\n    remote_repo = \"repo_level\"\n    remote_local = \"local_level\"\n    wrong_name = \"anything\"\n    assert main([\"remote\", \"add\", remote_repo, \"s3://bucket/repo\"]) == 0\n    assert main([\"remote\", \"add\", remote_local, \"s3://bucket/local\"]) == 0\n\n    assert main([\"remote\", \"default\", wrong_name]) == 251\n    assert main([\"remote\", \"default\", remote_repo]) == 0\n    assert main([\"remote\", \"default\", \"--local\", remote_local]) == 0\n\n    repo_config = configobj.ConfigObj(dvc.config.files[\"repo\"])\n    local_config = configobj.ConfigObj(dvc.config.files[\"local\"])\n\n    assert repo_config[\"core\"][\"remote\"] == remote_repo\n    assert local_config[\"core\"][\"remote\"] == remote_local\n\n\ndef test_remote_rename(dvc):\n    remote_name = \"drive\"\n    remote_url = \"gdrive://test/test\"\n    new_name = \"new\"\n    other_name = \"other\"\n    # prepare\n    assert main([\"remote\", \"add\", remote_name, remote_url]) == 0\n    config = dvc.config.load_one(\"repo\")\n    assert config[\"remote\"][remote_name][\"url\"] == remote_url\n    assert new_name not in config.get(\"remote\", {})\n\n    # rename failed\n    assert main([\"remote\", \"rename\", remote_name]) == 254\n    assert main([\"remote\", \"rename\", new_name, other_name]) == 251\n    config = dvc.config.load_one(\"repo\")\n    assert config[\"remote\"][remote_name][\"url\"] == remote_url\n    assert new_name not in config.get(\"remote\", {})\n\n    # rename success\n    assert main([\"remote\", \"rename\", remote_name, new_name]) == 0\n    config = dvc.config.load_one(\"repo\")\n    assert remote_name not in config.get(\"remote\", {})\n    assert config[\"remote\"][new_name][\"url\"] == remote_url\n\n\ndef test_remote_duplicated(dvc):\n    remote_name = \"drive\"\n    remote_url = \"gdrive://test/test\"\n    used_name = \"overlap\"\n    another_url = \"gdrive://test/test1\"\n    # prepare\n    assert main([\"remote\", \"add\", remote_name, remote_url]) == 0\n    assert main([\"remote\", \"add\", \"--local\", used_name, another_url]) == 0\n    config = dvc.config.load_one(\"repo\")\n    assert config[\"remote\"][remote_name][\"url\"] == remote_url\n    local_config = dvc.config.load_one(\"local\")\n    assert local_config[\"remote\"][used_name][\"url\"] == another_url\n\n    # rename duplicated\n    assert main([\"remote\", \"rename\", remote_name, used_name]) == 251\n    config = dvc.config.load_one(\"repo\")\n    assert config[\"remote\"][remote_name][\"url\"] == remote_url\n    local_config = dvc.config.load_one(\"local\")\n    assert local_config[\"remote\"][used_name][\"url\"] == another_url\n\n\ndef test_remote_default(dvc):\n    remote_name = \"drive\"\n    remote_url = \"gdrive://test/test\"\n    new_name = \"new\"\n    # prepare\n    assert main([\"remote\", \"add\", \"-d\", remote_name, remote_url]) == 0\n    assert main([\"remote\", \"default\", \"--local\", remote_name]) == 0\n    config = dvc.config.load_one(\"repo\")\n    assert config[\"core\"][\"remote\"] == remote_name\n    assert config[\"remote\"][remote_name][\"url\"] == remote_url\n    assert new_name not in config.get(\"remote\", {})\n    local_config = dvc.config.load_one(\"local\")\n    assert local_config[\"core\"][\"remote\"] == remote_name\n\n    # rename success\n    assert main([\"remote\", \"rename\", remote_name, new_name]) == 0\n    config = dvc.config.load_one(\"repo\")\n    assert remote_name not in config.get(\"remote\", {})\n    assert config[\"core\"][\"remote\"] == new_name\n    assert config[\"remote\"][new_name][\"url\"] == remote_url\n    assert remote_name not in config.get(\"remote\", {})\n    local_config = dvc.config.load_one(\"local\")\n    assert local_config[\"core\"][\"remote\"] == new_name\n\n\ndef test_protect_local_remote(tmp_dir, dvc, local_remote):\n    (stage,) = tmp_dir.dvc_gen(\"file\", \"file content\")\n\n    dvc.push()\n    odb = dvc.cloud.get_remote_odb(\"upstream\")\n    remote_cache_file = odb.oid_to_path(stage.outs[0].hash_info.value)\n\n    assert os.path.exists(remote_cache_file)\n    assert stat.S_IMODE(os.stat(remote_cache_file).st_mode) == 0o444\n\n\ndef test_push_incomplete_dir(tmp_dir, dvc, mocker, local_remote):\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    remote_odb = dvc.cloud.get_remote_odb(\"upstream\")\n\n    odb = dvc.cache.local\n    out = stage.outs[0]\n    file_objs = [entry_obj for _, _, entry_obj in out.obj]\n\n    # remove one of the cache files for directory\n    remove(odb.oid_to_path(file_objs[0].value))\n\n    dvc.push()\n    assert not remote_odb.exists(out.hash_info.value)\n    assert not remote_odb.exists(file_objs[0].value)\n    assert remote_odb.exists(file_objs[1].value)\n"
  },
  {
    "path": "tests/func/test_remove.py",
    "content": "import os\nimport re\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.fs import system\nfrom dvc.stage.exceptions import StageFileDoesNotExistError, StageFileIsNotDvcFileError\nfrom dvc.utils.fs import remove\nfrom dvc_objects.errors import ObjectDBError\nfrom tests.utils import get_gitignore_content\n\n\n@pytest.mark.parametrize(\"remove_outs\", [True, False])\ndef test_remove(tmp_dir, scm, dvc, run_copy, remove_outs):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    stage3 = run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    assert \"/foo\" in get_gitignore_content()\n    assert \"/bar\" in get_gitignore_content()\n    assert \"/foobar\" in get_gitignore_content()\n\n    for stage in [stage1, stage2, stage3]:\n        dvc.remove(stage.addressing, outs=remove_outs)\n        out_exists = (out.exists for out in stage.outs)\n        assert stage not in dvc.index.stages\n        if remove_outs:\n            assert not any(out_exists)\n        else:\n            assert all(out_exists)\n\n    assert not (tmp_dir / \".gitignore\").exists()\n\n\ndef test_remove_file_target(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    with pytest.raises(\n        StageFileIsNotDvcFileError,\n        match=re.escape(\"'foo' is not a .dvc file. Do you mean 'foo.dvc'?\"),\n    ):\n        dvc.remove(\"foo\")\n\n    dvc.remove(\"foo.dvc\")\n\n\ndef test_remove_non_existent_file(tmp_dir, dvc):\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.remove(\"non_existent_dvc_file.dvc\")\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.remove(\"non_existent_stage_name\")\n\n\ndef test_remove_broken_symlink(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.cache.local.cache_types = [\"symlink\"]\n\n    (stage,) = dvc.add(\"foo\")\n    remove(dvc.cache.local.path)\n    assert system.is_symlink(\"foo\")\n\n    with pytest.raises(ObjectDBError):\n        dvc.remove(stage.addressing)\n    assert os.path.lexists(\"foo\")\n    assert (tmp_dir / stage.relpath).exists()\n\n    dvc.remove(stage.addressing, outs=True)\n    assert not os.path.lexists(\"foo\")\n    assert not (tmp_dir / stage.relpath).exists()\n\n\ndef test_cmd_remove(tmp_dir, dvc):\n    assert main([\"remove\", \"non-existing-dvc-file\"]) == 1\n\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert main([\"remove\", stage.addressing]) == 0\n    assert not (tmp_dir / stage.relpath).exists()\n    assert (tmp_dir / \"foo\").exists()\n\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert main([\"remove\", stage.addressing, \"--outs\"]) == 0\n    assert not (tmp_dir / stage.relpath).exists()\n    assert not (tmp_dir / \"foo\").exists()\n\n\ndef test_cmd_remove_gitignore_single_stage(tmp_dir, scm, dvc, run_copy):\n    stage = dvc.run(name=\"my\", cmd='echo \"hello\" > out', deps=[], outs=[\"out\"])\n\n    assert (tmp_dir / \".gitignore\").exists()\n\n    assert main([\"remove\", stage.addressing]) == 0\n    assert not (tmp_dir / stage.relpath).exists()\n    assert not (stage.dvcfile._lockfile).exists()\n    assert not (tmp_dir / \".gitignore\").exists()\n\n\ndef test_cmd_remove_gitignore_multistage(tmp_dir, scm, dvc, run_copy):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage1 = run_copy(\"foo\", \"foo1\", name=\"copy-foo-foo1\")\n    stage2 = run_copy(\"foo1\", \"foo2\", name=\"copy-foo1-foo2\")\n\n    assert (tmp_dir / \".gitignore\").exists()\n\n    assert main([\"remove\", stage2.addressing]) == 0\n    assert main([\"remove\", stage1.addressing]) == 0\n    assert main([\"remove\", stage.addressing]) == 0\n    assert not (tmp_dir / \".gitignore\").exists()\n"
  },
  {
    "path": "tests/func/test_repo.py",
    "content": "import os\n\nfrom dulwich.porcelain import push as git_push\nfrom dulwich.porcelain import remote_add as git_remote_add\n\nfrom dvc.cachemgr import CacheManager\nfrom dvc.dvcfile import LOCK_FILE, PROJECT_FILE\nfrom dvc.fs import system\nfrom dvc.repo import Repo\nfrom dvc.scm import Git\n\n\ndef test_open_bare(tmp_dir, scm, dvc, tmp_path_factory):\n    tmp_dir.dvc_gen(\n        {\n            \"dir123\": {\"foo\": \"foo content\"},\n            \"dirextra\": {\"extrafoo\": \"extra foo content\"},\n        },\n        commit=\"initial\",\n    )\n\n    url = os.fspath(tmp_path_factory.mktemp(\"bare\"))\n    Git.init(url, bare=True).close()\n\n    git_remote_add(tmp_dir, \"origin\", url)\n    git_push(tmp_dir, \"origin\")\n\n    with Repo.open(url) as repo:\n        assert repo.scm.root_dir != url\n\n    with Repo.open(url, uninitialized=True) as repo:\n        assert repo.scm.root_dir != url\n\n\ndef test_destroy(tmp_dir, dvc, run_copy):\n    dvc.config[\"cache\"][\"type\"] = [\"symlink\"]\n    dvc.cache = CacheManager(dvc)\n\n    tmp_dir.dvc_gen(\"file\", \"text\")\n    tmp_dir.dvc_gen({\"dir\": {\"file\": \"lorem\", \"subdir/file\": \"ipsum\"}})\n\n    run_copy(\"file\", \"file2\", name=\"copy-file-file2\")\n    run_copy(\"file2\", \"file3\", name=\"copy-file2-file3\")\n    run_copy(\"file3\", \"file4\", name=\"copy-file3-file4\")\n\n    dvc.destroy()\n\n    # Remove all the files related to DVC\n    for path in [\n        \".dvc\",\n        \".dvcignore\",\n        \"file.dvc\",\n        \"dir.dvc\",\n        PROJECT_FILE,\n        LOCK_FILE,\n    ]:\n        assert not (tmp_dir / path).exists()\n\n    # Leave the rest of the files\n    for path in [\n        \"file\",\n        \"file2\",\n        \"file3\",\n        \"file4\",\n        \"dir/file\",\n        \"dir/subdir/file\",\n    ]:\n        assert (tmp_dir / path).is_file()\n\n    # Make sure that data was unprotected after `destroy`\n    for path in [\n        \"file\",\n        \"file2\",\n        \"file3\",\n        \"file4\",\n        \"dir\",\n        \"dir/file\",\n        \"dir/subdir\",\n        \"dir/subdir/file\",\n    ]:\n        assert not system.is_symlink(tmp_dir / path)\n"
  },
  {
    "path": "tests/func/test_repo_index.py",
    "content": "import os\nfrom itertools import chain\n\nimport pytest\nfrom pygtrie import Trie\n\nfrom dvc.exceptions import NoOutputOrStageError\nfrom dvc.repo.index import Index, index_from_targets\nfrom dvc.stage import Stage\n\n\ndef test_index(tmp_dir, scm, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    tmp_dir.commit([s.outs[0].fspath for s in (stage1, stage2)], msg=\"add\")\n\n    index = Index.from_repo(dvc)\n\n    assert set(index.stages) == {stage1, stage2}\n    assert index.outs_graph\n    assert index.graph\n    assert isinstance(index.outs_trie, Trie)\n    index.check_graph()\n\n\ndef test_repr(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    brancher = dvc.brancher([scm.get_rev()])\n    rev = next(brancher)\n    assert rev == \"workspace\"\n    assert repr(Index(dvc)) == f\"Index({dvc}, fs@{rev})\"\n\n    rev = next(brancher)\n    assert rev == scm.get_rev()\n    assert repr(Index(dvc)) == f\"Index({dvc}, fs@{rev[:7]})\"\n\n\ndef outputs_equal(actual, expected):\n    actual, expected = list(actual), list(expected)\n\n    def sort_fn(output):\n        return output.fspath\n\n    assert len(actual) == len(expected)\n    pairs = zip(sorted(actual, key=sort_fn), sorted(expected, key=sort_fn))\n    assert all(actual.fspath == expected.fspath for actual, expected in pairs)\n    return True\n\n\ndef test_deps_outs_getters(tmp_dir, dvc, run_copy_metrics):\n    (foo_stage,) = tmp_dir.dvc_gen({\"foo\": \"foo\"})\n    tmp_dir.gen({\"params.yaml\": \"param: 100\\n\"})\n    tmp_dir.gen({\"m_temp.yaml\": str(5)})\n\n    run_stage1 = run_copy_metrics(\n        \"m_temp.yaml\",\n        \"m.yaml\",\n        metrics=[\"m.yaml\"],\n        params=[\"param\"],\n        name=\"copy-metrics\",\n    )\n    (tmp_dir / \"metric_t.json\").dump_json(\n        [{\"a\": 1, \"b\": 2}, {\"a\": 2, \"b\": 3}], sort_keys=True\n    )\n    run_stage2 = run_copy_metrics(\n        \"metric_t.json\",\n        \"metric.json\",\n        plots_no_cache=[\"metric.json\"],\n        name=\"copy-metrics2\",\n    )\n\n    index = Index.from_repo(dvc)\n\n    stages = [foo_stage, run_stage1, run_stage2]\n    (metrics,) = run_stage1.outs\n    _, params = run_stage1.deps\n    (plots,) = run_stage2.outs\n\n    expected_outs = chain.from_iterable([stage.outs for stage in stages])\n    expected_deps = chain.from_iterable([stage.deps for stage in stages])\n\n    assert outputs_equal(index.outs, expected_outs)\n    assert outputs_equal(index.deps, expected_deps)\n    assert outputs_equal(index.decorated_outs, [metrics, plots])\n    assert outputs_equal(index.metrics, [metrics])\n    assert outputs_equal(index.plots, [plots])\n    assert outputs_equal(index.params, [params])\n\n\ndef test_update(dvc):\n    \"\"\"Test that update overwrites existing stages with the new ones.\n\n    The old stages and the new ones might have same hash, so we are\n    making sure that the old stages were removed and replaced by new ones\n    using `id`/`is` checks.\n    \"\"\"\n    index = Index.from_repo(dvc)\n    new_stage = Stage(dvc, path=\"path1\")\n    new_index = index.update({new_stage})\n\n    assert not index.stages\n    assert new_index.stages == [new_stage]\n\n    dup_stage1 = Stage(dvc, path=\"path1\")\n    dup_stage2 = Stage(dvc, path=\"path2\")\n    dup_index = index.update([dup_stage1, dup_stage2])\n    assert not index.stages\n    assert len(new_index.stages) == 1\n    assert new_index.stages[0] is new_stage\n    assert set(map(id, dup_index.stages)) == {id(dup_stage1), id(dup_stage2)}\n\n\ndef assert_index_equal(first, second, strict=True, ordered=True):\n    assert len(first) == len(second), \"Index have different no. of stages\"\n    assert set(first) == set(second), \"Index does not have same stages\"\n    if ordered:\n        assert list(first) == list(second), (\n            \"Index does not have same sequence of stages\"\n        )\n    if strict:\n        assert set(map(id, first)) == set(map(id, second)), (\n            \"Index is not strictly equal\"\n        )\n\n\ndef test_skip_graph_checks(dvc, mocker):\n    # See https://github.com/treeverse/dvc/issues/2671 for more info\n    mock_build_graph = mocker.spy(Index.graph, \"fget\")\n\n    # sanity check\n    Index(dvc).check_graph()\n    assert mock_build_graph.called\n    mock_build_graph.reset_mock()\n\n    # check that our hack can be enabled\n    dvc._skip_graph_checks = True\n    Index(dvc).check_graph()\n    assert not mock_build_graph.called\n    mock_build_graph.reset_mock()\n\n    # check that our hack can be disabled\n    dvc._skip_graph_checks = False\n    Index(dvc).check_graph()\n    assert mock_build_graph.called\n\n\ndef test_used_objs(tmp_dir, scm, dvc, run_copy):\n    from dvc_data.hashfile.hash_info import HashInfo\n\n    dvc.scm_context.autostage = True\n    tmp_dir.dvc_gen({\"dir\": {\"subdir\": {\"file\": \"file\"}}, \"foo\": \"foo\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    scm.commit(\"commit\")\n\n    for _ in dvc.brancher(revs=[\"HEAD\"]):\n        expected_objs = [\n            HashInfo(\n                name=\"md5\",\n                value=\"acbd18db4cc2f85cedef654fccc4a4d8\",\n                obj_name=\"bar\",\n            ),\n            HashInfo(\n                name=\"md5\",\n                value=\"8c7dd922ad47494fc02c388e12c00eac\",\n                obj_name=\"dir/subdir/file\",\n            ),\n            HashInfo(\n                name=\"md5\",\n                value=\"d28c9e28591aeb7e303dc6772ffa6f6b.dir\",\n                obj_name=\"dir\",\n            ),\n        ]\n\n        assert dvc.index.used_objs() == {None: set(expected_objs)}\n        assert dvc.index.used_objs(\"dir\") == {None: set(expected_objs[1:])}\n        assert dvc.index.used_objs(\".\", recursive=True) == {None: set(expected_objs)}\n        assert dvc.index.used_objs(\"copy-foo-bar\", with_deps=True) == {\n            None: {expected_objs[0]}\n        }\n\n\ndef test_view_granular_dir(tmp_dir, scm, dvc, run_copy):\n    tmp_dir.dvc_gen(\n        {\"dir\": {\"subdir\": {\"in-subdir\": \"in-subdir\"}, \"in-dir\": \"in-dir\"}},\n        commit=\"init\",\n    )\n    index = Index.from_repo(dvc)\n\n    # view should include the specific target, parent dirs, and children\n    # view should exclude any siblings of the target\n    view = index.targets_view(\"dir/subdir\")\n\n    assert view.data_keys == {\"repo\": {(\"dir\", \"subdir\")}}\n\n    data_index = view.data[\"repo\"]\n    assert (\"dir\",) in data_index\n    assert (\"dir\", \"subdir\") in data_index\n    assert (\"dir\", \"subdir\", \"in-subdir\") in data_index\n    assert (\"dir\", \"in-dir\") not in data_index\n\n\ndef test_view_onerror(tmp_dir, scm, dvc):\n    from dvc.exceptions import NoOutputOrStageError\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"init\")\n    index = Index.from_repo(dvc)\n\n    with pytest.raises(NoOutputOrStageError):\n        index.targets_view([\"foo\", \"missing\"])\n\n    failed = []\n\n    def onerror(target, exc):\n        failed.append((target, exc))\n\n    view = index.targets_view([\"foo\", \"missing\"], onerror=onerror)\n    data = view.data[\"repo\"]\n\n    assert len(failed) == 1\n    target, exc = failed[0]\n    assert target == \"missing\"\n    assert isinstance(exc, NoOutputOrStageError)\n    assert len(data) == 1\n    assert data[(\"foo\",)]\n\n\ndef test_view_stage_filter(tmp_dir, scm, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    tmp_dir.commit([s.outs[0].fspath for s in (stage1, stage2)], msg=\"add\")\n    index = Index.from_repo(dvc)\n\n    view = index.targets_view(None)\n    assert set(view.stages) == {stage1, stage2}\n    assert {out.fs_path for out in view.outs} == {\n        out.fs_path for out in (stage1.outs + stage2.outs)\n    }\n\n    view = index.targets_view(\n        None, stage_filter=lambda s: getattr(s, \"name\", \"\").startswith(\"copy\")\n    )\n    assert set(view.stages) == {stage2}\n    assert {out.fs_path for out in view.outs} == {out.fs_path for out in stage2.outs}\n\n\ndef test_view_outs_filter(tmp_dir, scm, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    tmp_dir.commit([s.outs[0].fspath for s in (stage1, stage2)], msg=\"add\")\n    index = Index.from_repo(dvc)\n\n    view = index.targets_view(None, outs_filter=lambda o: o.def_path == \"foo\")\n    assert set(view.stages) == {stage1, stage2}\n    assert {out.fs_path for out in view.outs} == {out.fs_path for out in stage1.outs}\n\n\ndef test_view_combined_filter(tmp_dir, scm, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    tmp_dir.commit([s.outs[0].fspath for s in (stage1, stage2)], msg=\"add\")\n    index = Index.from_repo(dvc)\n\n    view = index.targets_view(\n        None,\n        stage_filter=lambda s: getattr(s, \"name\", \"\").startswith(\"copy\"),\n        outs_filter=lambda o: o.def_path == \"foo\",\n    )\n    assert set(view.stages) == {stage2}\n    assert set(view.outs) == set()\n\n    view = index.targets_view(\n        None,\n        stage_filter=lambda s: getattr(s, \"name\", \"\").startswith(\"copy\"),\n        outs_filter=lambda o: o.def_path == \"bar\",\n    )\n    assert set(view.stages) == {stage2}\n    assert {out.fs_path for out in view.outs} == {out.fs_path for out in stage2.outs}\n\n\ndef test_view_brancher(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"init\")\n    index = Index.from_repo(dvc)\n\n    for _ in dvc.brancher(revs=[\"HEAD\"]):\n        view = index.targets_view(\"foo\")\n        data = view.data[\"repo\"]\n        assert data[(\"foo\",)]\n\n\ndef test_with_gitignore(tmp_dir, dvc, scm):\n    (stage,) = tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    index = Index.from_repo(dvc)\n    assert index.stages == [stage]\n\n    scm.ignore(stage.path)\n    scm._reset()\n\n    index = Index.from_repo(dvc)\n    assert not index.stages\n\n\ndef test_ignored_dir_unignored_pattern(tmp_dir, dvc, scm):\n    tmp_dir.gen({\".gitignore\": \"data/**\\n!data/**/\\n!data/**/*.dvc\"})\n    scm.add([\".gitignore\"])\n    (stage,) = tmp_dir.dvc_gen({\"data/raw/tracked.csv\": \"5,6,7,8\"})\n    index = Index.from_repo(dvc)\n    assert index.stages == [stage]\n\n\ndef test_param_keys_returns_default_file(tmp_dir, dvc):\n    tmp_dir.gen({\"params.yaml\": \"param: 100\\n\"})\n    index = Index.from_repo(dvc)\n    assert index.param_keys == {\"repo\": {(\"params.yaml\",)}}\n\n\ndef test_param_keys_no_params(dvc):\n    index = Index.from_repo(dvc)\n    assert index.param_keys == {\"repo\": set()}\n\n\ndef test_param_keys_top_level_params(tmp_dir, dvc):\n    params_file_path = \"classifier/custom_params_file.yaml\"\n    top_level_params = f\"\"\"\nparams:\n  - {params_file_path}\n    \"\"\"\n    tmp_dir.gen(params_file_path, \"param: 100\\n\")\n    tmp_dir.gen(\"dvc.yaml\", top_level_params)\n    index = Index.from_repo(dvc)\n    assert index.param_keys == {\"repo\": {(\"classifier\", \"custom_params_file.yaml\")}}\n\n\ndef test_data_index(tmp_dir, dvc, local_cloud, erepo_dir):\n    tmp_dir.dvc_gen({\"foo\": b\"foo\", \"dir\": {\"bar\": b\"bar\", \"subdir\": {\"baz\": b\"baz\"}}})\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"efoo\", b\"efoo\", commit=\"create efoo\")\n        erepo_dir.dvc_gen(\n            \"edir\",\n            {\"ebar\": b\"ebar\", \"esubdir\": {\"ebaz\": b\"ebaz\"}},\n            commit=\"create edir\",\n        )\n\n    dvc.imp(os.fspath(erepo_dir), \"efoo\")\n    dvc.imp(os.fspath(erepo_dir), \"edir\")\n\n    dvc.imp(os.fspath(erepo_dir), \"efoo\", \"efoo_partial\", no_download=True)\n    dvc.imp(os.fspath(erepo_dir), \"edir\", \"edir_partial\", no_download=True)\n\n    local_cloud.gen(\"ifoo\", b\"ifoo\")\n    local_cloud.gen(\"idir\", {\"ibar\": b\"ibar\", \"isubdir\": {\"ibaz\": b\"ibaz\"}})\n\n    dvc.imp_url(str(local_cloud / \"ifoo\"))\n    dvc.imp_url(str(local_cloud / \"idir\"))\n\n    dvc.imp_url(str(local_cloud / \"ifoo\"), \"ifoo_partial\", no_download=True)\n    dvc.imp_url(str(local_cloud / \"idir\"), \"idir_partial\", no_download=True)\n\n    index = Index.from_repo(dvc)\n    assert index.data_keys == {\n        \"local\": set(),\n        \"repo\": {\n            (\"foo\",),\n            (\"dir\",),\n            (\"efoo\",),\n            (\"edir\",),\n            (\"efoo_partial\",),\n            (\"edir_partial\",),\n            (\"ifoo\",),\n            (\"idir\",),\n            (\"ifoo_partial\",),\n            (\"idir_partial\",),\n        },\n    }\n\n    data = index.data[\"repo\"]\n    assert set(data.keys()) == {\n        (\"foo\",),\n        (\"dir\",),\n        (\"efoo\",),\n        (\"edir\",),\n        (\"efoo_partial\",),\n        (\"edir_partial\",),\n        (\"ifoo\",),\n        (\"idir\",),\n        (\"ifoo_partial\",),\n        (\"idir_partial\",),\n    }\n\n    assert not data.storage_map[(\"foo\",)].remote\n    assert not data.storage_map[(\"dir\",)].remote\n\n    assert data.storage_map[(\"efoo\",)].remote.read_only\n    assert data.storage_map[(\"edir\",)].remote.read_only\n\n    assert data.storage_map[(\"efoo_partial\",)].remote.read_only\n    assert data.storage_map[(\"edir_partial\",)].remote.read_only\n\n    assert not data.storage_map[(\"ifoo\",)].remote\n    assert not data.storage_map[(\"idir\",)].remote\n\n    assert data.storage_map[(\"ifoo_partial\",)].remote.read_only\n    assert data.storage_map[(\"idir_partial\",)].remote.read_only\n\n\ndef test_index_from_targets(tmp_dir, dvc):\n    stage1 = dvc.stage.add(name=\"stage1\", cmd=\"echo hello\")\n    stage2 = dvc.stage.add(name=\"stage2\", cmd=\"echo hello world\")\n\n    (foo_stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    index = index_from_targets(dvc, [\"stage1\"])\n    assert index.stages == [stage1]\n\n    index = index_from_targets(dvc, [\"stage2\"])\n    assert index.stages == [stage2]\n\n    index = index_from_targets(dvc, [\"dvc.yaml\"])\n    assert set(index.stages) == {stage1, stage2}\n\n    index = index_from_targets(dvc, [\"dvc.yaml:stage2\"])\n    assert index.stages == [stage2]\n\n    index = index_from_targets(dvc, [\"foo.dvc\"])\n    assert index.stages == [foo_stage]\n\n    index = index_from_targets(dvc, [\"stage1\", \"foo.dvc\"])\n    assert set(index.stages) == {foo_stage, stage1}\n\n    with pytest.raises(NoOutputOrStageError):\n        index = index_from_targets(dvc, [\"not-existing-stage\"])\n"
  },
  {
    "path": "tests/func/test_root.py",
    "content": "from dvc.cli import main\n\n\ndef test_root(tmp_dir, dvc, capsys):\n    assert main([\"root\"]) == 0\n    assert \".\" in capsys.readouterr()[0]\n\n\ndef test_root_locked(tmp_dir, dvc, capsys):\n    # NOTE: check that `dvc root` is not blocked with dvc lock\n    with dvc.lock:\n        assert main([\"root\"]) == 0\n    assert \".\" in capsys.readouterr()[0]\n"
  },
  {
    "path": "tests/func/test_run.py",
    "content": "import logging\nimport os\nimport textwrap\nimport uuid\n\nimport pytest\nfrom funcy import get_in\n\nfrom dvc.cli import main\nfrom dvc.dependency import ParamsDependency\nfrom dvc.dependency.base import DependencyDoesNotExistError\nfrom dvc.dvcfile import LOCK_FILE, PROJECT_FILE, load_file\nfrom dvc.exceptions import (\n    ArgumentDuplicationError,\n    CircularDependencyError,\n    CyclicGraphError,\n    InvalidArgumentError,\n    OutputDuplicationError,\n    OverlappingOutputPathsError,\n)\nfrom dvc.stage import PipelineStage\nfrom dvc.stage.exceptions import (\n    DuplicateStageName,\n    InvalidStageName,\n    StagePathNotDirectoryError,\n    StagePathNotFoundError,\n    StagePathOutsideError,\n)\nfrom dvc.utils.serialize import load_yaml\n\n\ndef test_run(tmp_dir, dvc, copy_script):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert not os.path.exists(PROJECT_FILE)\n    stage = dvc.run(\n        cmd=\"python copy.py foo bar\",\n        deps=[\"foo\", \"copy.py\"],\n        outs=[\"bar\"],\n        name=\"copy-foo-to-bar\",\n    )\n    assert isinstance(stage, PipelineStage)\n    assert stage.name == \"copy-foo-to-bar\"\n    assert os.path.exists(PROJECT_FILE)\n    assert os.path.exists(LOCK_FILE)\n    assert stage.cmd == \"python copy.py foo bar\"\n    assert len(stage.deps) == 2\n    assert len(stage.outs) == 1\n\n    with pytest.raises(OutputDuplicationError):\n        dvc.run(\n            cmd=\"python copy.py foo bar\",\n            deps=[\"foo\", \"copy.py\"],\n            outs=[\"bar\"],\n            name=\"duplicate\",\n        )\n\n\ndef test_run_empty(dvc):\n    dvc.run(cmd=\"echo hello world\", deps=[], outs=[], outs_no_cache=[], name=\"empty\")\n\n\ndef test_run_missing_dep(dvc):\n    with pytest.raises(DependencyDoesNotExistError):\n        dvc.run(\n            cmd=\"command\",\n            deps=[\"non-existing-dep\"],\n            outs=[],\n            outs_no_cache=[],\n            name=\"missing-dep\",\n        )\n\n\ndef test_run_no_exec(tmp_dir, dvc, scm, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert not os.path.exists(PROJECT_FILE)\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-to-bar\", no_exec=True)\n    assert isinstance(stage, PipelineStage)\n    assert stage.name == \"copy-foo-to-bar\"\n    assert os.path.exists(PROJECT_FILE)\n    assert not os.path.exists(LOCK_FILE)\n    assert not os.path.exists(\"bar\")\n\n    data, _ = stage.dvcfile._load()\n    assert data[\"stages\"][\"copy-foo-to-bar\"] == {\n        \"cmd\": \"python copy.py foo bar\",\n        \"deps\": [\"copy.py\", \"foo\"],\n        \"outs\": [\"bar\"],\n    }\n    with open(\".gitignore\", encoding=\"utf-8\") as fobj:\n        assert fobj.read() == \"/foo\\n/bar\\n\"\n\n\ndef test_run_repeat(tmp_dir, dvc, run_copy):\n    from dvc.dvcfile import PROJECT_FILE, load_file\n    from dvc.stage import PipelineStage\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"foo1\", name=\"copy-foo-foo1\")\n    run_copy(\"foo1\", \"foo2\", name=\"copy-foo1-foo2\")\n    run_copy(\"foo2\", \"foo3\", name=\"copy-foo2-foo3\")\n\n    stages = list(load_file(dvc, PROJECT_FILE).stages.values())\n    assert len(stages) == 3\n    assert all(isinstance(stage, PipelineStage) for stage in stages)\n    assert {stage.name for stage in stages} == {\n        \"copy-foo-foo1\",\n        \"copy-foo1-foo2\",\n        \"copy-foo2-foo3\",\n    }\n\n\ndef test_run_cached(tmp_dir, dvc, run_copy, mocker):\n    from dvc.stage.run import subprocess\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    run_copy(\"foo\", \"foo2\", name=\"copy-foo1-foo2\")\n    spy = mocker.spy(subprocess, \"Popen\")\n    run_copy(\"foo\", \"foo2\", name=\"copy-foo1-foo2\")\n    assert not spy.called\n\n\ndef test_dump_on_non_cached_outputs(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\")\n    dvc.run(\n        cmd=\"cp foo foo1\",\n        deps=[\"foo\"],\n        name=\"copy-foo1-foo2\",\n        outs_no_cache=[\"foo1\"],\n    )\n\n\ndef test_with_wdir(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    stage = dvc.run(\n        cmd=\"cp foo foo1\",\n        deps=[\"foo\"],\n        name=\"copy-foo1-foo2\",\n        outs=[\"foo1\"],\n        wdir=\"dir\",\n    )\n\n    data, _ = load_file(dvc, stage.path)._load()\n    assert data[\"stages\"][\"copy-foo1-foo2\"][\"wdir\"] == \"dir\"\n\n\ndef test_always_changed(tmp_dir, dvc):\n    from dvc.dvcfile import load_file\n\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    stage = dvc.run(\n        cmd=\"cp foo foo1\",\n        deps=[\"foo\"],\n        name=\"copy-foo1-foo2\",\n        outs=[\"foo1\"],\n        always_changed=True,\n    )\n\n    data, _ = load_file(dvc, stage.path)._load()\n    assert data[\"stages\"][\"copy-foo1-foo2\"][\"always_changed\"]\n\n\ndef test_graph(tmp_dir, dvc):\n    from dvc.exceptions import CyclicGraphError\n\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    dvc.run(deps=[\"foo\"], outs=[\"bar\"], cmd=\"echo foo > bar\", name=\"1\")\n\n    dvc.run(deps=[\"bar\"], outs=[\"baz\"], cmd=\"echo bar > baz\", name=\"2\")\n\n    with pytest.raises(CyclicGraphError):\n        dvc.run(deps=[\"baz\"], outs=[\"foo\"], cmd=\"echo baz > foo\", name=\"3\")\n\n\nclass TestRunCircularDependency:\n    def test(self, dvc):\n        with pytest.raises(CircularDependencyError):\n            dvc.run(\n                cmd=\"command\",\n                deps=[\"foo\"],\n                outs=[\"foo\"],\n                name=\"circular-dependency\",\n            )\n\n    def test_outs_no_cache(self, dvc):\n        with pytest.raises(CircularDependencyError):\n            dvc.run(\n                cmd=\"command\",\n                deps=[\"foo\"],\n                outs_no_cache=[\"foo\"],\n                name=\"circular-dependency\",\n            )\n\n    def test_non_normalized_paths(self, dvc):\n        with pytest.raises(CircularDependencyError):\n            dvc.run(\n                cmd=\"command\",\n                deps=[\"./foo\"],\n                outs=[\"foo\"],\n                name=\"circular-dependency\",\n            )\n\n    def test_graph(self, tmp_dir, dvc):\n        tmp_dir.gen(\"foo\", \"foo\")\n        dvc.run(\n            deps=[\"foo\"],\n            outs=[\"bar.txt\"],\n            cmd=\"echo bar > bar.txt\",\n            name=\"gen-bar-txt\",\n        )\n\n        dvc.run(\n            deps=[\"bar.txt\"],\n            outs=[\"baz.txt\"],\n            cmd=\"echo baz > baz.txt\",\n            name=\"gen-baz-txt\",\n        )\n\n        with pytest.raises(CyclicGraphError):\n            dvc.run(\n                deps=[\"baz.txt\"],\n                outs=[\"foo\"],\n                cmd=\"echo baz > foo\",\n                name=\"gen-foo\",\n            )\n\n\nclass TestRunDuplicatedArguments:\n    def test(self, dvc):\n        with pytest.raises(ArgumentDuplicationError):\n            dvc.run(\n                cmd=\"command\",\n                deps=[],\n                outs=[\"foo\", \"foo\"],\n                name=\"circular-dependency\",\n            )\n\n    def test_outs_no_cache(self, dvc):\n        with pytest.raises(ArgumentDuplicationError):\n            dvc.run(\n                cmd=\"command\",\n                outs=[\"foo\"],\n                outs_no_cache=[\"foo\"],\n                name=\"circular-dependency\",\n            )\n\n    def test_non_normalized_paths(self, dvc):\n        with pytest.raises(ArgumentDuplicationError):\n            dvc.run(\n                cmd=\"command\",\n                deps=[],\n                outs=[\"foo\", \"./foo\"],\n                name=\"circular-dependency\",\n            )\n\n\nclass TestRunBadWdir:\n    def test(self, make_tmp_dir, dvc):\n        with pytest.raises(StagePathOutsideError):\n            dvc.run(cmd=\"command\", wdir=make_tmp_dir(\"tmp\"), name=\"bad-wdir\")\n\n    def test_same_prefix(self, tmp_dir, dvc):\n        path = f\"{tmp_dir}-{uuid.uuid4()}\"\n        os.mkdir(path)\n        with pytest.raises(StagePathOutsideError):\n            dvc.run(cmd=\"command\", wdir=path, name=\"bad-wdir\")\n\n    def test_not_found(self, tmp_dir, dvc):\n        path = os.path.join(tmp_dir, str(uuid.uuid4()))\n        with pytest.raises(StagePathNotFoundError):\n            dvc.run(cmd=\"command\", wdir=path, name=\"bad-wdir\")\n\n    def test_not_dir(self, tmp_dir, dvc):\n        path = tmp_dir / str(uuid.uuid4())\n        path.mkdir()\n        path = path / str(uuid.uuid4())\n        path.touch()\n        with pytest.raises(StagePathNotDirectoryError):\n            dvc.run(cmd=\"command\", wdir=os.fspath(path), name=\"bad-wdir\")\n\n\nclass TestCmdRunWorkingDirectory:\n    def test_default_wdir_is_not_written(self, tmp_dir, dvc):\n        dvc.run(cmd=\"echo test > foo\", outs=[\"foo\"], wdir=\".\", name=\"echo-foo\")\n\n        d = load_yaml(\"dvc.yaml\")\n        assert \"wdir\" not in get_in(d, [\"stages\", \"echo-foo\"])\n\n        dvc.run(cmd=\"echo test > bar\", outs=[\"bar\"], name=\"echo-bar\")\n        d = load_yaml(\"dvc.yaml\")\n        assert \"wdir\" not in get_in(d, [\"stages\", \"echo-bar\"])\n\n    def test_fname_changes_path_and_wdir(self, tmp_dir, dvc):\n        dirpath = tmp_dir / \"dir\"\n        dirpath.mkdir()\n\n        with dirpath.chdir():\n            stage = dvc.run(\n                cmd=\"echo test > foo\",\n                outs=[\"foo\"],\n                wdir=os.fspath(tmp_dir),\n                name=\"echo\",\n            )\n        assert stage.wdir == os.path.realpath(tmp_dir)\n\n        # Check that it is dumped properly\n        d = load_yaml(\"dir/dvc.yaml\")\n        assert get_in(d, [\"stages\", \"echo\", \"wdir\"]) == \"..\"\n\n\ndef test_run_dump(tmp_dir, dvc, run_head):\n    from dvc.dvcfile import load_file\n\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"foo\": \"foo\\nfoo\",\n                \"bar\": \"bar\\nbar\",\n                \"foobar\": \"foobar\\foobar\",\n            }\n        }\n    )\n\n    dvc.run(\n        cmd=\"cp foo foo2\",\n        deps=[\"foo\"],\n        name=\"copy-foo-foo2\",\n        wdir=\"dir\",\n        outs_persist=[\"foo2\"],\n        always_changed=True,\n    )\n    data = load_file(dvc, PROJECT_FILE)._load()[0]\n    assert data == {\n        \"stages\": {\n            \"copy-foo-foo2\": {\n                \"cmd\": \"cp foo foo2\",\n                \"deps\": [\"foo\"],\n                \"outs\": [{\"foo2\": {\"persist\": True}}],\n                \"always_changed\": True,\n                \"wdir\": \"dir\",\n            }\n        }\n    }\n\n    run_head(\n        \"foo\",\n        \"bar\",\n        \"foobar\",\n        name=\"head-files\",\n        outs=[\"bar-1\"],\n        outs_persist=[\"foo-1\"],\n        metrics_no_cache=[\"foobar-1\"],\n        wdir=\"dir\",\n    )\n    assert load_file(dvc, PROJECT_FILE)._load()[0] == {\n        \"stages\": {\n            \"head-files\": {\n                \"cmd\": \"python {} foo bar foobar\".format(\n                    (tmp_dir / \"head.py\").resolve()\n                ),\n                \"wdir\": \"dir\",\n                \"deps\": [\"bar\", \"foo\", \"foobar\"],\n                \"outs\": [\"bar-1\", {\"foo-1\": {\"persist\": True}}],\n                \"metrics\": [{\"foobar-1\": {\"cache\": False}}],\n            },\n            **data[\"stages\"],\n        }\n    }\n\n\n@pytest.mark.parametrize(\"char\", [\"@:\", \"#\", \"$\", \":\", \"/\", \"\\\\\", \".\", \";\", \",\"])\ndef test_run_with_invalid_stage_name(run_copy, char):\n    with pytest.raises(InvalidStageName):\n        run_copy(\"foo\", \"bar\", name=f\"copy_name-{char}\")\n\n\ndef test_run_with_name_having_hyphen_underscore(tmp_dir, dvc, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo_bar\")\n\n\ndef test_run_already_exists(tmp_dir, dvc, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy\")\n    with pytest.raises(DuplicateStageName):\n        run_copy(\"bar\", \"foobar\", name=\"copy\", force=False)\n    run_copy(\"bar\", \"foobar\", name=\"copy\", force=True)\n\n\nsupported_params = {\n    \"name\": \"Answer\",\n    \"answer\": 42,\n    \"floats\": 42.0,\n    \"lists\": [42, 42.0, \"42\"],\n    \"nested\": {\"nested1\": {\"nested2\": \"42\", \"nested2-2\": 41.99999}},\n}\n\n\ndef test_run_params_default(tmp_dir, dvc):\n    (tmp_dir / \"params.yaml\").dump(supported_params)\n    stage = dvc.run(\n        name=\"read_params\",\n        params=[\"nested.nested1.nested2\"],\n        cmd=\"cat params.yaml\",\n    )\n    assert isinstance(stage.deps[0], ParamsDependency)\n    assert stage.deps[0].params == [\"nested.nested1.nested2\"]\n\n    lockfile = stage.dvcfile._lockfile\n    assert lockfile.load()[\"stages\"][\"read_params\"][\"params\"] == {\n        \"params.yaml\": {\"nested.nested1.nested2\": \"42\"}\n    }\n\n    data, _ = stage.dvcfile._load()\n    assert data[\"stages\"][\"read_params\"][\"params\"] == [\"nested.nested1.nested2\"]\n\n\ndef test_run_params_custom_file(tmp_dir, dvc):\n    (tmp_dir / \"params2.yaml\").dump(supported_params)\n    stage = dvc.run(\n        name=\"read_params\",\n        params=[\"params2.yaml:lists\"],\n        cmd=\"cat params2.yaml\",\n    )\n\n    isinstance(stage.deps[0], ParamsDependency)\n    assert stage.deps[0].params == [\"lists\"]\n    lockfile = stage.dvcfile._lockfile\n    assert lockfile.load()[\"stages\"][\"read_params\"][\"params\"] == {\n        \"params2.yaml\": {\"lists\": [42, 42.0, \"42\"]}\n    }\n\n    data, _ = stage.dvcfile._load()\n    assert data[\"stages\"][\"read_params\"][\"params\"] == [{\"params2.yaml\": [\"lists\"]}]\n\n\ndef test_run_params_no_exec(tmp_dir, dvc):\n    (tmp_dir / \"params2.yaml\").dump(supported_params)\n    stage = dvc.run(\n        name=\"read_params\",\n        params=[\"params2.yaml:lists\"],\n        cmd=\"cat params2.yaml\",\n        no_exec=True,\n    )\n\n    isinstance(stage.deps[0], ParamsDependency)\n    assert stage.deps[0].params == [\"lists\"]\n    assert not stage.dvcfile._lockfile.exists()\n\n    data, _ = stage.dvcfile._load()\n    assert data[\"stages\"][\"read_params\"][\"params\"] == [{\"params2.yaml\": [\"lists\"]}]\n\n\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [\n        {\"outs\": [\"foo\"], \"deps\": [\"bar\"]},\n        {\"outs\": [\"foo\"], \"deps\": [\"bar\"], \"name\": \"copy-foo-bar\"},\n    ],\n)\ndef test_run_without_cmd(tmp_dir, dvc, kwargs):\n    with pytest.raises(InvalidArgumentError) as exc:\n        dvc.run(**kwargs)\n    assert str(exc.value) == \"command is not specified\"\n\n\ndef test_run_overwrite_order(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"foo1\": \"foo1\"})\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    run_copy(\"foo1\", \"bar1\", name=\"copy-foo-bar\", force=True)\n\n    data = (tmp_dir / PROJECT_FILE).parse()\n    assert list(data[\"stages\"].keys()) == [\"copy-foo-bar\", \"copy-bar-foobar\"]\n\n\ndef test_run_overwrite_preserves_meta_and_comment(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\"foo\": \"foo\", \"foo1\": \"foo1\"})\n    text = textwrap.dedent(\n        \"\"\"\\\n        stages:\n          copy-foo-bar:\n            cmd: python copy.py {src} {dest}\n            deps:\n            - copy.py\n            - {src}\n            outs:\n            # comments are preserved\n            - {dest}\n            meta:\n              name: meta is preserved too\n    \"\"\"\n    )\n    (tmp_dir / PROJECT_FILE).write_text(text.format(src=\"foo\", dest=\"bar\"))\n    assert dvc.reproduce(PROJECT_FILE)\n\n    assert run_copy(\"foo1\", \"bar1\", name=\"copy-foo-bar\", force=True)\n\n    assert (tmp_dir / PROJECT_FILE).read_text() == text.format(src=\"foo1\", dest=\"bar1\")\n\n\ndef test_run_external_outputs(tmp_dir, dvc, local_workspace):\n    hash_name = \"md5\"\n    foo_hash = \"acbd18db4cc2f85cedef654fccc4a4d8\"\n    bar_hash = \"37b51d194a7513e45b56f6524f2d51f2\"\n\n    local_workspace.gen(\"foo\", \"foo\")\n    dvc.run(\n        name=\"mystage\",\n        cmd=\"mycmd\",\n        deps=[\"remote://workspace/foo\"],\n        outs_no_cache=[\"remote://workspace/bar\"],\n        no_exec=True,\n    )\n\n    dvc_yaml = (\n        \"stages:\\n\"\n        \"  mystage:\\n\"\n        \"    cmd: mycmd\\n\"\n        \"    deps:\\n\"\n        \"    - remote://workspace/foo\\n\"\n        \"    outs:\\n\"\n        \"    - remote://workspace/bar:\\n\"\n        \"        cache: false\\n\"\n    )\n\n    assert (tmp_dir / \"dvc.yaml\").read_text() == dvc_yaml\n    assert not (tmp_dir / \"dvc.lock\").exists()\n\n    local_workspace.gen(\"bar\", \"bar\")\n    dvc.commit(\"dvc.yaml\", force=True)\n\n    assert (tmp_dir / \"dvc.yaml\").read_text() == dvc_yaml\n    assert (tmp_dir / \"dvc.lock\").read_text() == (\n        \"schema: '2.0'\\n\"\n        \"stages:\\n\"\n        \"  mystage:\\n\"\n        \"    cmd: mycmd\\n\"\n        \"    deps:\\n\"\n        \"    - path: remote://workspace/foo\\n\"\n        \"      hash: md5\\n\"\n        f\"      {hash_name}: {foo_hash}\\n\"\n        \"      size: 3\\n\"\n        \"    outs:\\n\"\n        \"    - path: remote://workspace/bar\\n\"\n        \"      hash: md5\\n\"\n        f\"      {hash_name}: {bar_hash}\\n\"\n        \"      size: 3\\n\"\n    )\n\n    assert (local_workspace / \"foo\").read_text() == \"foo\"\n    assert (local_workspace / \"bar\").read_text() == \"bar\"\n    assert not (local_workspace / \"cache\").exists()\n\n\ndef test_rerun_callback(dvc):\n    def run_callback(force=False):\n        return dvc.run(cmd=\"echo content > out\", force=force, name=\"echo\")\n\n    assert run_callback() is not None\n    with pytest.raises(DuplicateStageName):\n        assert run_callback() is not None\n    assert run_callback(force=True) is not None\n\n\ndef test_rerun_changed_dep(tmp_dir, run_copy):\n    tmp_dir.gen(\"foo\", \"foo content\")\n    assert run_copy(\"foo\", \"out\", name=\"copy\") is not None\n\n    tmp_dir.gen(\"foo\", \"changed content\")\n    with pytest.raises(DuplicateStageName):\n        run_copy(\"foo\", \"out\", force=False, name=\"copy\")\n    assert run_copy(\"foo\", \"out\", force=True, name=\"copy\")\n\n\ndef test_run_remove_outs(tmp_dir, dvc, append_foo_script):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.run(\n        deps=[\"append_foo.py\"],\n        outs=[\"foo\"],\n        cmd=\"python append_foo.py foo\",\n        name=\"append-foo\",\n    )\n\n\n@pytest.mark.parametrize(\"metrics_type\", [\"metrics\", \"metrics_no_cache\"])\ndef test_metrics_dir(tmp_dir, dvc, caplog, run_copy_metrics, metrics_type):\n    copyargs = {metrics_type: [\"dir_metric\"]}\n    tmp_dir.gen({\"dir\": {\"file\": \"content\"}})\n    with caplog.at_level(logging.DEBUG, \"dvc\"):\n        run_copy_metrics(\"dir\", \"dir_metric\", name=\"copy-metrics\", **copyargs)\n    assert \"directory 'dir_metric' cannot be used as metrics.\" in caplog.messages\n\n\ndef test_rerun_deterministic(tmp_dir, run_copy, mocker):\n    from dvc.stage.run import subprocess\n\n    tmp_dir.gen(\"foo\", \"foo content\")\n\n    spy = mocker.spy(subprocess, \"Popen\")\n\n    run_copy(\"foo\", \"out\", name=\"copy\")\n    assert spy.called\n\n    spy.reset_mock()\n    run_copy(\"foo\", \"out\", name=\"copy\")\n    assert not spy.called\n\n\ndef test_rerun_deterministic_ignore_cache(tmp_dir, run_copy, mocker):\n    from dvc.stage.run import subprocess\n\n    tmp_dir.gen(\"foo\", \"foo content\")\n\n    spy = mocker.spy(subprocess, \"Popen\")\n\n    run_copy(\"foo\", \"out\", name=\"copy\")\n    assert spy.called\n\n    spy.reset_mock()\n    run_copy(\"foo\", \"out\", run_cache=False, name=\"copy\")\n    assert spy.called\n\n\ndef test_rerun_changed_stage(tmp_dir, run_copy):\n    tmp_dir.gen(\"foo\", \"foo content\")\n    assert run_copy(\"foo\", \"out\", name=\"copy\") is not None\n\n    tmp_dir.gen(\"bar\", \"bar content\")\n    with pytest.raises(DuplicateStageName):\n        run_copy(\"bar\", \"out\", force=False, name=\"copy\")\n\n\ndef test_rerun_changed_out(tmp_dir, run_copy):\n    tmp_dir.gen(\"foo\", \"foo content\")\n    assert run_copy(\"foo\", \"out\", name=\"copy\") is not None\n\n    tmp_dir.gen(\"out\", \"modification\")\n    with pytest.raises(DuplicateStageName):\n        run_copy(\"foo\", \"out\", force=False, name=\"copy\")\n\n\ndef test_should_raise_on_overlapping_output_paths(tmp_dir, dvc, append_foo_script):\n    tmp_dir.gen(\"data\", {\"foo\": \"foo\", \"bar\": \"bar\"})\n    ret = main([\"add\", \"data\"])\n    assert ret == 0\n\n    foo_file = os.path.join(\"data\", \"foo\")\n    with pytest.raises(OverlappingOutputPathsError) as err:\n        dvc.run(\n            outs=[\"data/foo\"],\n            cmd=f\"python append_foo.py {foo_file}\",\n            name=\"append-foo\",\n        )\n\n    error_output = str(err.value)\n\n    assert \"The output paths:\\n\" in error_output\n    assert \"\\n'data'('data.dvc')\\n\" in error_output\n    assert f\"\\n'{foo_file}'('append-foo')\\n\" in error_output\n    assert (\n        \"overlap and are thus in the same tracked directory.\\n\"\n        \"To keep reproducibility, outputs should be in separate \"\n        \"tracked directories or tracked individually.\" in error_output\n    )\n\n\ndef test_should_not_checkout_upon_corrupted_local_hardlink_cache(\n    mocker, tmp_dir, dvc, copy_script\n):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.cache.local.cache_types = [\"hardlink\"]\n\n    stage = dvc.run(\n        deps=[\"foo\"],\n        outs=[\"bar\"],\n        cmd=\"python copy.py foo bar\",\n        name=\"copy\",\n    )\n\n    os.chmod(\"bar\", 0o644)\n    with open(\"bar\", \"w\", encoding=\"utf-8\") as fd:\n        fd.write(\"corrupting the output cache\")\n\n    spy_checkout = mocker.spy(stage.outs[0], \"checkout\")\n    from dvc.stage import run as stage_run\n\n    spy_run = mocker.spy(stage_run, \"cmd_run\")\n\n    with dvc.lock:\n        stage.run()\n\n        spy_run.assert_called_once()\n        spy_checkout.assert_not_called()\n"
  },
  {
    "path": "tests/func/test_run_cache.py",
    "content": "import logging\nimport os\n\nimport pytest\nfrom funcy import first\n\nfrom dvc.dvcfile import LOCK_FILE\nfrom dvc.stage.cache import RunCacheNotSupported, _get_stage_hash\nfrom dvc.utils.fs import remove\n\n\ndef _recurse_count_files(path):\n    return len([os.path.join(r, f) for r, _, fs in os.walk(path) for f in fs])\n\n\ndef test_push_pull(tmp_dir, dvc, erepo_dir, run_copy, local_remote):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert dvc.push(run_cache=True) == 2\n    erepo_dir.add_remote(config=local_remote.config)\n    with erepo_dir.chdir():\n        assert not os.path.exists(erepo_dir.dvc.stage_cache.cache_dir)\n        assert erepo_dir.dvc.pull(run_cache=True) == {\n            \"added\": [],\n            \"deleted\": [],\n            \"modified\": [],\n            \"stats\": {\"fetched\": 0, \"added\": 0, \"deleted\": 0, \"modified\": 0},\n        }\n        assert os.listdir(erepo_dir.dvc.stage_cache.cache_dir)\n\n\ndef test_restore(tmp_dir, dvc, run_copy, mocker):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    mock_restore = mocker.spy(dvc.stage_cache, \"restore\")\n    mock_run = mocker.patch(\"dvc.stage.run.cmd_run\")\n\n    # removing any information that `dvc` could use to re-generate from\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / LOCK_FILE).unlink()\n\n    (stage,) = dvc.reproduce(\"copy-foo-bar\")\n\n    mock_restore.assert_called_once_with(stage, dry=False)\n    mock_run.assert_not_called()\n    assert (tmp_dir / \"bar\").exists()\n    assert not (tmp_dir / \"foo\").unlink()\n    assert (tmp_dir / LOCK_FILE).exists()\n\n\ndef test_save(tmp_dir, dvc, run_copy):\n    run_cache_dir = dvc.stage_cache.cache_dir\n    assert not os.path.exists(run_cache_dir)\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert _recurse_count_files(run_cache_dir) == 1\n    assert dvc.stage_cache._load(stage)\n\n\ndef test_do_not_save_on_no_exec_and_dry(tmp_dir, dvc, run_copy):\n    run_cache_dir = dvc.stage_cache.cache_dir\n    assert not os.path.exists(run_cache_dir)\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\", no_exec=True)\n\n    assert _recurse_count_files(run_cache_dir) == 0\n    assert not dvc.stage_cache._load(stage)\n\n    (stage,) = dvc.reproduce(\"copy-foo-bar\", dry=True)\n\n    assert _recurse_count_files(run_cache_dir) == 0\n    assert not dvc.stage_cache._load(stage)\n\n\n@pytest.mark.parametrize(\n    \"out_type,run_cache\",\n    [\n        (\"metrics_no_cache\", True),\n        (\"plots_no_cache\", True),\n        (\"outs_no_cache\", False),\n    ],\n)\ndef test_outs_no_cache_deactivate_run_cache(tmp_dir, dvc, out_type, run_cache):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.run(\n        deps=[\"foo\"],\n        cmd=\"cp foo bar && cp foo goo\",\n        outs=[\"goo\"],\n        name=\"copy-foo-bar\",\n        **{out_type: [\"bar\"]},\n    )\n    assert os.path.isdir(dvc.stage_cache.cache_dir) == run_cache\n\n\ndef test_memory_for_multiple_runs_of_same_stage(tmp_dir, dvc, run_copy, mocker):\n    tmp_dir.gen(\"foo\", \"foo\")\n    assert not os.path.exists(dvc.stage_cache.cache_dir)\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert _recurse_count_files(dvc.stage_cache.cache_dir) == 1\n    tmp_dir.gen(\"foo\", \"foobar\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert _recurse_count_files(dvc.stage_cache.cache_dir) == 2\n\n    from dvc.stage import run as _run\n\n    mock_restore = mocker.spy(dvc.stage_cache, \"restore\")\n    mocker.spy(dvc.stage_cache, \"_load_cache\")\n    mock_run = mocker.spy(_run, \"cmd_run\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / LOCK_FILE).unlink()\n    (stage,) = dvc.reproduce(\"copy-foo-bar\")\n\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert (tmp_dir / \"bar\").read_text() == \"foobar\"\n    mock_run.assert_not_called()\n    mock_restore.assert_called_once_with(stage, dry=False)\n    mock_restore.reset_mock()\n\n    (tmp_dir / LOCK_FILE).unlink()\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.reproduce(\"copy-foo-bar\")\n\n    assert (tmp_dir / \"bar\").read_text() == \"foo\"\n    mock_run.assert_not_called()\n    mock_restore.assert_called_once_with(stage, dry=False)\n    assert (tmp_dir / \"bar\").exists()\n    assert not (tmp_dir / \"foo\").unlink()\n    assert (tmp_dir / LOCK_FILE).exists()\n\n\ndef test_newest_entry_is_loaded_for_non_deterministic_stage(tmp_dir, dvc, mocker):\n    tmp_dir.gen(\"foo\", \"foo\")\n    assert not os.path.exists(dvc.stage_cache.cache_dir)\n\n    dvc.stage.add(\n        name=\"non-deterministic\",\n        cmd='python -c \"from time import time; print(time())\" > bar',\n        deps=[\"foo\"],\n        outs=[\"bar\"],\n    )\n\n    for i in range(4):\n        (stage,) = dvc.reproduce(\"non-deterministic\", force=True)\n        assert _recurse_count_files(dvc.stage_cache.cache_dir) == i + 1\n\n    key = _get_stage_hash(stage)\n    cache_dir = dvc.stage_cache._get_cache_dir(key)\n    old_entries = os.listdir(cache_dir)\n\n    (stage,) = dvc.reproduce(\"non-deterministic\", force=True)\n    newest_output = (tmp_dir / \"bar\").read_text()\n    newest_entry = first(e for e in os.listdir(cache_dir) if e not in old_entries)\n\n    from dvc.stage import run as _run\n\n    mock_restore = mocker.spy(dvc.stage_cache, \"restore\")\n    mock_load = mocker.spy(dvc.stage_cache, \"_load_cache\")\n    mock_run = mocker.spy(_run, \"cmd_run\")\n\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / LOCK_FILE).unlink()\n    (stage,) = dvc.reproduce(\"non-deterministic\")\n\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert (tmp_dir / \"bar\").read_text() == newest_output\n    mock_run.assert_not_called()\n    mock_restore.assert_called_once_with(stage, dry=False)\n    mock_load.assert_called_with(key, newest_entry)\n\n\ndef test_memory_runs_of_multiple_stages(tmp_dir, dvc, run_copy, mocker):\n    tmp_dir.gen(\"foo\", \"foo\")\n    assert not os.path.exists(dvc.stage_cache.cache_dir)\n\n    run_copy(\"foo\", \"foo.bak\", name=\"backup-foo\")\n    assert _recurse_count_files(dvc.stage_cache.cache_dir) == 1\n\n    tmp_dir.gen(\"bar\", \"bar\")\n    run_copy(\"bar\", \"bar.bak\", name=\"backup-bar\")\n    assert _recurse_count_files(dvc.stage_cache.cache_dir) == 2\n\n    from dvc.stage import run as _run\n\n    mock_restore = mocker.spy(dvc.stage_cache, \"restore\")\n    mock_run = mocker.spy(_run, \"cmd_run\")\n\n    (tmp_dir / \"foo.bak\").unlink()\n    (tmp_dir / \"bar.bak\").unlink()\n    (tmp_dir / LOCK_FILE).unlink()\n    (stage,) = dvc.reproduce(\"backup-foo\")\n\n    assert (tmp_dir / \"foo.bak\").read_text() == \"foo\"\n    assert (tmp_dir / LOCK_FILE).exists()\n    mock_run.assert_not_called()\n    mock_restore.assert_called_once_with(stage, dry=False)\n    mock_restore.reset_mock()\n\n    (stage,) = dvc.reproduce(\"backup-bar\")\n\n    assert (tmp_dir / \"bar.bak\").read_text() == \"bar\"\n    assert (tmp_dir / LOCK_FILE).exists()\n    mock_run.assert_not_called()\n    mock_restore.assert_called_once_with(stage, dry=False)\n\n\ndef test_restore_pull(tmp_dir, dvc, run_copy, mocker, local_remote):\n    import dvc.output as dvc_output\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    dvc.push(run_cache=True)\n\n    mock_restore = mocker.spy(dvc.stage_cache, \"restore\")\n    mock_run = mocker.patch(\"dvc.stage.run.cmd_run\")\n    mock_checkout = mocker.spy(dvc_output, \"checkout\")\n\n    # removing any information that `dvc` could use to re-generate from\n    (tmp_dir / \"bar\").unlink()\n    (tmp_dir / LOCK_FILE).unlink()\n    remove(stage.outs[0].cache_path)\n\n    # removing local run cache\n    remove(dvc.stage_cache.cache_dir)\n\n    (stage,) = dvc.reproduce(\"copy-foo-bar\", pull=True)\n\n    mock_restore.assert_called_once_with(stage, pull=True, dry=False)\n    mock_run.assert_not_called()\n    assert mock_checkout.call_count == 2\n    assert (tmp_dir / \"bar\").exists()\n    assert not (tmp_dir / \"foo\").unlink()\n    assert (tmp_dir / LOCK_FILE).exists()\n\n\ndef test_push_pull_unsupported(tmp_dir, dvc, mocker, run_copy, local_remote, caplog):\n    tmp_dir.gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    mocker.patch.object(\n        dvc.cloud, \"get_remote_odb\", side_effect=RunCacheNotSupported(\"foo\")\n    )\n    with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n        dvc.push(run_cache=True)\n        assert \"failed to push run cache\" in caplog.text\n    with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n        dvc.pull(run_cache=True)\n        assert \"failed to pull run cache\" in caplog.text\n"
  },
  {
    "path": "tests/func/test_scm.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.scm import SCM, Git, NoSCM, SCMError, lfs_prefetch\n\n\ndef test_init_none(tmp_dir):\n    assert isinstance(SCM(os.fspath(tmp_dir), no_scm=True), NoSCM)\n\n\ndef test_init_git(tmp_dir):\n    Git.init(os.fspath(tmp_dir))\n    assert isinstance(SCM(os.fspath(tmp_dir)), Git)\n\n\ndef test_init_no_git(tmp_dir):\n    with pytest.raises(SCMError, match=r\".* is not a git repository\"):\n        SCM(os.fspath(tmp_dir))\n\n\ndef test_init_sub_dir(tmp_dir):\n    Git.init(os.fspath(tmp_dir))\n    subdir = tmp_dir / \"dir\"\n    subdir.mkdir()\n\n    scm = SCM(os.fspath(subdir))\n    assert scm.root_dir == os.fspath(tmp_dir)\n\n\ndef test_lfs_prefetch(tmp_dir, dvc, scm, mocker):\n    mock_fetch = mocker.patch(\"scmrepo.git.lfs.fetch\")\n    rev = scm.get_rev()\n\n    with dvc.switch(rev):\n        lfs_prefetch(dvc.dvcfs, [\"foo\"])\n        mock_fetch.assert_not_called()\n\n    tmp_dir.scm_gen(\n        \".gitattributes\", \".lfs filter=lfs diff=lfs merge=lfs -text\", commit=\"init lfs\"\n    )\n    rev = scm.get_rev()\n    with dvc.switch(rev):\n        lfs_prefetch(dvc.dvcfs, [\"foo\"])\n        mock_fetch.assert_called_once()\n"
  },
  {
    "path": "tests/func/test_scm_context.py",
    "content": "def test_scm_context_autostage(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    with dvc.scm_context(autostage=True) as context:\n        context.track_file(\"foo\")\n\n    scm._reset()\n    assert scm.is_tracked(\"foo\")\n\n\ndef test_scm_context_ignore(tmp_dir, scm, dvc):\n    with dvc.scm_context as context:\n        context.ignore(tmp_dir / \"foo\")\n        assert context.files_to_track == {scm.GITIGNORE}\n\n    scm._reset()\n    assert scm.is_ignored(\"foo\")\n\n\ndef test_scm_context_when_already_ignored(tmp_dir, scm, dvc):\n    scm.ignore(tmp_dir / \"foo\")\n    scm._reset()\n\n    with dvc.scm_context() as context:\n        context.ignore(tmp_dir / \"foo\")\n        # If files are already ignored, dvc should not try to track a new\n        # .gitignore file as it's a no-op.\n        assert not context.files_to_track\n\n    scm._reset()\n    assert scm.is_ignored(\"foo\")\n\n\ndef test_scm_context_ignore_remove(tmp_dir, scm, dvc):\n    scm.ignore(tmp_dir / \"foo\")\n    scm.ignore(tmp_dir / \"bar\")\n\n    with dvc.scm_context:\n        dvc.scm_context.ignore_remove(tmp_dir / \"foo\")\n        assert dvc.scm_context.files_to_track == {scm.GITIGNORE}\n\n    scm._reset()\n    assert not scm.is_ignored(\"foo\")\n\n\ndef test_scm_context_try_ignore_remove_non_existing_entry(tmp_dir, dvc, scm):\n    with dvc.scm_context as context:\n        context.ignore_remove(tmp_dir / \"foo\")\n        assert not context.files_to_track\n    scm._reset()\n    assert not scm.is_ignored(\"foo\")\n\n\ndef test_scm_context_no_track_on_ignore_remove(tmp_dir, dvc, scm):\n    # DVC should not keep track of file when nothing actually changed\n    # i.e. here ignore was reverted back.\n    scm.ignore(tmp_dir / \"foo\")\n    with dvc.scm_context:\n        dvc.scm_context.ignore_remove(tmp_dir / \"foo\")\n        assert not dvc.scm_context.files_to_track\n\n    scm._reset()\n    assert not scm.is_ignored(\"foo\")\n"
  },
  {
    "path": "tests/func/test_stage.py",
    "content": "import os\nimport re\n\nimport pytest\n\nfrom dvc.annotations import Annotation\nfrom dvc.dvcfile import SingleStageFile\nfrom dvc.exceptions import OutputDuplicationError\nfrom dvc.fs import LocalFileSystem\nfrom dvc.output import Output\nfrom dvc.repo import Repo\nfrom dvc.stage import PipelineStage, Stage\nfrom dvc.stage.utils import compute_md5\nfrom dvc.utils import dict_md5\nfrom dvc.utils.serialize import dump_yaml, load_yaml\nfrom dvc.utils.strictyaml import YAMLValidationError\n\n\ndef test_cmd_obj():\n    with pytest.raises(YAMLValidationError):\n        SingleStageFile.validate({Stage.PARAM_CMD: {}})\n\n\ndef test_no_cmd():\n    SingleStageFile.validate({})\n\n\ndef test_object():\n    with pytest.raises(YAMLValidationError):\n        SingleStageFile.validate({Stage.PARAM_DEPS: {}})\n\n    with pytest.raises(YAMLValidationError):\n        SingleStageFile.validate({Stage.PARAM_OUTS: {}})\n\n\ndef test_none():\n    SingleStageFile.validate({Stage.PARAM_DEPS: None})\n    SingleStageFile.validate({Stage.PARAM_OUTS: None})\n\n\ndef test_empty_list():\n    d = {Stage.PARAM_DEPS: []}\n    SingleStageFile.validate(d)\n\n    d = {Stage.PARAM_OUTS: []}\n    SingleStageFile.validate(d)\n\n\ndef test_list():\n    lst = [\n        {Output.PARAM_PATH: \"foo\", LocalFileSystem.PARAM_CHECKSUM: \"123\"},\n        {Output.PARAM_PATH: \"bar\", LocalFileSystem.PARAM_CHECKSUM: None},\n        {Output.PARAM_PATH: \"baz\"},\n    ]\n    d = {Stage.PARAM_DEPS: lst}\n    SingleStageFile.validate(d)\n\n    lst[0][Output.PARAM_CACHE] = True\n    lst[1][Output.PARAM_CACHE] = False\n    d = {Stage.PARAM_OUTS: lst}\n    SingleStageFile.validate(d)\n\n\ndef test_reload(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    d = load_yaml(stage.relpath)\n\n    # NOTE: checking that reloaded stage didn't change its checksum\n    md5 = \"11111111111111111111111111111111\"\n    d[stage.PARAM_MD5] = md5\n    dump_yaml(stage.relpath, d)\n\n    dvcfile = SingleStageFile(dvc, stage.relpath)\n    stage = dvcfile.stage\n\n    assert stage is not None\n    dvcfile.dump(stage)\n\n    d = load_yaml(stage.relpath)\n    assert d[stage.PARAM_MD5] == md5\n\n\ndef test_default_wdir_ignored_in_checksum(tmp_dir, dvc):\n    tmp_dir.gen(\"bar\", \"bar\")\n    stage = dvc.run(cmd=\"cp bar foo\", deps=[\"bar\"], outs=[\"foo\"], name=\"copy-foo-bar\")\n\n    d = stage.dumpd()\n    assert Stage.PARAM_WDIR not in d\n\n    d = load_yaml(\"dvc.yaml\")\n    assert Stage.PARAM_WDIR not in d[\"stages\"][\"copy-foo-bar\"]\n\n    with dvc.lock:\n        stage = stage.reload()\n        assert not stage.changed()\n\n\ndef test_external_remote_output_resolution(tmp_dir, dvc, make_remote):\n    tmp_path = make_remote(\"tmp\", default=False)\n    tmp_dir.add_remote(url=\"remote://tmp/storage\", name=\"storage\", default=False)\n    storage = tmp_path / \"storage\"\n    storage.mkdir()\n    file_path = storage / \"file\"\n\n    dvc.run(\n        cmd=f\"echo file > {file_path}\",\n        outs_no_cache=[\"remote://storage/file\"],\n        name=\"gen-file\",\n    )\n    assert os.path.exists(file_path)\n\n\ndef test_external_remote_dependency_resolution(tmp_dir, dvc, make_remote):\n    tmp_path = make_remote(\"tmp\", default=False)\n    tmp_dir.add_remote(url=\"remote://tmp/storage\", name=\"storage\", default=False)\n    storage = tmp_path / \"storage\"\n    storage.mkdir()\n    file_path = storage / \"file\"\n    file_path.write_text(\"Isle of Dogs\", encoding=\"utf-8\")\n\n    dvc.imp_url(\"remote://storage/file\", \"movie.txt\")\n    assert (tmp_dir / \"movie.txt\").read_text() == \"Isle of Dogs\"\n\n\ndef test_md5_ignores_comments(tmp_dir, dvc):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo content\")\n\n    with open(stage.path, \"a\", encoding=\"utf-8\") as f:\n        f.write(\"# End comment\\n\")\n\n    new_stage = SingleStageFile(dvc, stage.path).stage\n    assert not new_stage.changed_stage()\n\n\ndef test_md5_ignores_annotations(tmp_dir, dvc):\n    data = {\n        \"desc\": \"stage desc\",\n        \"meta\": {\"key1\": \"value1\", \"key2\": \"value2\"},\n        \"outs\": [\n            {\n                \"md5\": \"d3b07384d113edec49eaa6238ad5ff00\",\n                \"size\": 4,\n                \"hash\": \"md5\",\n                \"path\": \"foo\",\n                \"desc\": \"foo desc\",\n                \"type\": \"mytype\",\n                \"labels\": [\"get-started\", \"dataset-registry\"],\n                \"meta\": {\"key1\": \"value1\"},\n            }\n        ],\n    }\n    (tmp_dir / \"foo.dvc\").dump(data)\n    stage = dvc.stage.load_one(\"foo.dvc\")\n    assert compute_md5(stage) == \"cde267b60ef5a00e9a35cc1999ab83a3\"\n    assert (\n        dict_md5(\n            {\n                \"outs\": [\n                    {\n                        \"md5\": \"d3b07384d113edec49eaa6238ad5ff00\",\n                        \"hash\": \"md5\",\n                        \"path\": \"foo\",\n                    }\n                ]\n            }\n        )\n        == \"cde267b60ef5a00e9a35cc1999ab83a3\"\n    )\n\n\ndef test_meta_desc_is_preserved(tmp_dir, dvc):\n    data = {\n        \"desc\": \"stage desc\",\n        \"meta\": {\"key1\": \"value1\", \"key2\": \"value2\"},\n        \"outs\": [\n            {\n                \"md5\": \"d3b07384d113edec49eaa6238ad5ff00\",\n                \"size\": 4,\n                \"hash\": \"md5\",\n                \"path\": \"foo\",\n                \"desc\": \"foo desc\",\n                \"type\": \"mytype\",\n                \"labels\": [\"get-started\", \"dataset-registry\"],\n                \"meta\": {\"key\": \"value\"},\n            }\n        ],\n    }\n    (tmp_dir / \"foo.dvc\").dump(data)\n    stage = dvc.stage.load_one(\"foo.dvc\")\n\n    assert stage.meta == {\"key1\": \"value1\", \"key2\": \"value2\"}\n    assert stage.desc == \"stage desc\"\n    assert stage.outs[0].annot == Annotation(\n        desc=\"foo desc\",\n        type=\"mytype\",\n        labels=[\"get-started\", \"dataset-registry\"],\n        meta={\"key\": \"value\"},\n    )\n\n    # sanity check\n    stage.dump()\n    assert (tmp_dir / \"foo.dvc\").parse() == data\n\n\ndef test_parent_repo_collect_stages(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"subdir\": {}})\n    tmp_dir.gen({\"deep\": {\"dir\": {}}})\n    subrepo_dir = tmp_dir / \"subdir\"\n    deep_subrepo_dir = tmp_dir / \"deep\" / \"dir\"\n\n    with subrepo_dir.chdir():\n        subrepo = Repo.init(subdir=True)\n        subrepo_dir.gen(\"subrepo_file\", \"subrepo file content\")\n        subrepo.add(\"subrepo_file\")\n\n    with deep_subrepo_dir.chdir():\n        deep_subrepo = Repo.init(subdir=True)\n        deep_subrepo_dir.gen(\"subrepo_file\", \"subrepo file content\")\n        deep_subrepo.add(\"subrepo_file\")\n\n    dvc._reset()\n\n    stages = dvc.stage.collect(None)\n    subrepo_stages = subrepo.stage.collect(None)\n    deep_subrepo_stages = deep_subrepo.stage.collect(None)\n\n    assert stages == []\n    assert subrepo_stages != []\n    assert deep_subrepo_stages != []\n\n\n@pytest.mark.parametrize(\"with_deps\", [False, True])\ndef test_collect_symlink(tmp_dir, dvc, with_deps):\n    from dvc.exceptions import StageNotFoundError\n\n    tmp_dir.gen({\"data\": {\"foo\": \"foo contents\"}})\n    foo_path = os.path.join(\"data\", \"foo\")\n    dvc.add(foo_path)\n\n    data_link = tmp_dir / \"data_link\"\n    data_link.symlink_to(\"data\")\n\n    if with_deps:\n        # NOTE: with_deps means that we'll need to collect and use dvcfiles in the repo\n        # and we currently don't follow symlinks when collecting those, so it will not\n        # be able to find the target stage.\n        with pytest.raises(StageNotFoundError):\n            dvc.stage.collect(target=str(data_link / \"foo.dvc\"), with_deps=with_deps)\n    else:\n        stage = next(\n            iter(\n                dvc.stage.collect(\n                    target=str(data_link / \"foo.dvc\"), with_deps=with_deps\n                )\n            )\n        )\n\n        assert stage.addressing == os.path.join(\"data_link\", \"foo.dvc\")\n\n    stage = next(iter(dvc.stage.collect(target=f\"{foo_path}.dvc\", with_deps=with_deps)))\n\n    assert stage.addressing == os.path.join(\"data\", \"foo.dvc\")\n\n\ndef test_stage_strings_representation(tmp_dir, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert stage1.addressing == \"foo.dvc\"\n    assert repr(stage1) == \"Stage: 'foo.dvc'\"\n    assert str(stage1) == \"stage: 'foo.dvc'\"\n\n    stage2 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert stage2.addressing == \"copy-foo-bar\"\n    assert repr(stage2) == \"Stage: 'copy-foo-bar'\"\n    assert str(stage2) == \"stage: 'copy-foo-bar'\"\n\n    folder = tmp_dir / \"dir\"\n    folder.mkdir()\n    with folder.chdir():\n        # `Stage` caches `relpath` results, forcing it to reset\n        stage1.path = stage1.path\n        stage2.path = stage2.path\n\n        rel_path = os.path.relpath(stage1.path)\n        assert stage1.addressing == rel_path\n        assert repr(stage1) == f\"Stage: '{rel_path}'\"\n        assert str(stage1) == f\"stage: '{rel_path}'\"\n\n        rel_path = os.path.relpath(stage2.path)\n        assert stage2.addressing == f\"{rel_path}:{stage2.name}\"\n        assert repr(stage2) == f\"Stage: '{rel_path}:{stage2.name}'\"\n        assert str(stage2) == f\"stage: '{rel_path}:{stage2.name}'\"\n\n\ndef test_stage_on_no_path_string_repr(tmp_dir, dvc):\n    s = Stage(dvc)\n    assert s.addressing == \"No path\"\n    assert repr(s) == \"Stage: 'No path'\"\n    assert str(s) == \"stage: 'No path'\"\n\n    p = PipelineStage(dvc, name=\"stage_name\")\n    assert p.addressing == \"No path:stage_name\"\n    assert repr(p) == \"Stage: 'No path:stage_name'\"\n    assert str(p) == \"stage: 'No path:stage_name'\"\n\n\ndef test_stage_remove_pipeline_stage(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    dvc_file = stage.dvcfile\n    with dvc.lock:\n        stage.remove(purge=False)\n    assert stage.name in dvc_file.stages\n\n    with dvc.lock:\n        stage.remove()\n\n    dvc_file._reset()\n    assert stage.name not in dvc_file.stages\n    assert \"copy-bar-foobar\" in dvc_file.stages\n\n\ndef test_stage_remove_pointer_stage(tmp_dir, dvc, run_copy):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    with dvc.lock:\n        stage.remove(purge=False)\n    assert not (tmp_dir / \"foo\").exists()\n    assert (tmp_dir / stage.relpath).exists()\n\n    with dvc.lock:\n        stage.remove()\n    assert not (tmp_dir / stage.relpath).exists()\n\n\ndef test_stage_add_duplicated_output(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n\n    with pytest.raises(\n        OutputDuplicationError,\n        match=re.escape(\n            \"Use `dvc remove foo.dvc` to stop tracking the overlapping output.\"\n        ),\n    ):\n        dvc.stage.add(name=\"duplicated\", cmd=\"echo bar > foo\", outs=[\"foo\"])\n"
  },
  {
    "path": "tests/func/test_stage_load.py",
    "content": "import os\nfrom operator import itemgetter\n\nimport pytest\nfrom funcy import raiser\n\nfrom dvc.dvcfile import PROJECT_FILE, FileIsGitIgnored\nfrom dvc.exceptions import NoOutputOrStageError\nfrom dvc.repo import Repo\nfrom dvc.stage.exceptions import (\n    StageFileDoesNotExistError,\n    StageNameUnspecified,\n    StageNotFound,\n)\nfrom dvc.utils import relpath\nfrom dvc.utils.fs import remove\nfrom dvc.utils.strictyaml import YAMLValidationError\n\n\ndef test_collect(tmp_dir, scm, dvc, run_copy):\n    def collect_outs(*args, **kwargs):\n        return {\n            str(out)\n            for stage in dvc.stage.collect(*args, **kwargs)\n            for out in stage.outs\n        }\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    scm.add([\".gitignore\", \"foo.dvc\", \"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"Add foo and bar\")\n\n    scm.checkout(\"new-branch\", create_new=True)\n\n    run_copy(\"bar\", \"buzz\", name=\"copy-bar-buzz\")\n    scm.add([\".gitignore\", \"dvc.yaml\", \"dvc.lock\"])\n    scm.commit(\"Add buzz\")\n\n    assert collect_outs(\"copy-foo-bar\", with_deps=True) == {\"foo\", \"bar\"}\n    assert collect_outs(\"copy-bar-buzz\", with_deps=True) == {\"foo\", \"bar\", \"buzz\"}\n    assert collect_outs(\"copy-bar-buzz\", with_deps=False) == {\"buzz\"}\n\n    run_copy(\"foo\", \"foobar\", name=\"copy-foo-foobar\")\n    assert collect_outs(\":copy-foo-foobar\") == {\"foobar\"}\n    assert collect_outs(\":copy-foo-foobar\", with_deps=True) == {\"foobar\", \"foo\"}\n    assert collect_outs(\"dvc.yaml:copy-foo-foobar\", recursive=True) == {\"foobar\"}\n    assert collect_outs(\"copy-foo-foobar\") == {\"foobar\"}\n    assert collect_outs(\"copy-foo-foobar\", with_deps=True) == {\"foobar\", \"foo\"}\n    assert collect_outs(\"copy-foo-foobar\", recursive=True) == {\"foobar\"}\n\n    run_copy(\"foobar\", \"baz\", name=\"copy-foobar-baz\")\n    assert collect_outs(\"dvc.yaml\") == {\"foobar\", \"baz\", \"bar\", \"buzz\"}\n    assert collect_outs(\"dvc.yaml\", with_deps=True) == {\n        \"foobar\",\n        \"baz\",\n        \"bar\",\n        \"buzz\",\n        \"foo\",\n    }\n\n\ndef test_collect_dir_recursive(tmp_dir, dvc, run_head):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\"}})\n    (stage1,) = dvc.add(\"dir/*\", glob=True)\n    with (tmp_dir / \"dir\").chdir():\n        stage2 = run_head(\"foo\", name=\"head-foo\")\n        stage3 = run_head(\"foo-1\", name=\"head-foo1\")\n    assert set(dvc.stage.collect(\"dir\", recursive=True)) == {stage1, stage2, stage3}\n\n\ndef test_collect_with_not_existing_output_or_stage_name(tmp_dir, dvc, run_copy):\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.collect(\"some_file\")\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    with pytest.raises(StageNotFound):\n        dvc.stage.collect(\"some_file\")\n\n\ndef test_stages(tmp_dir, dvc):\n    def collect_stages():\n        return {stage.relpath for stage in Repo(os.fspath(tmp_dir)).index.stages}\n\n    tmp_dir.dvc_gen({\"file\": \"a\", \"dir/file\": \"b\", \"dir/subdir/file\": \"c\"})\n\n    assert collect_stages() == {\n        \"file.dvc\",\n        os.path.join(\"dir\", \"file.dvc\"),\n        os.path.join(\"dir\", \"subdir\", \"file.dvc\"),\n    }\n\n    tmp_dir.gen(\".dvcignore\", \"dir\")\n\n    assert collect_stages() == {\"file.dvc\"}\n\n\n@pytest.fixture\ndef stages(tmp_dir, run_copy):\n    stage1, stage2 = tmp_dir.dvc_gen({\"foo\": \"foo\", \"lorem\": \"lorem\"})\n    return {\n        \"foo-generate\": stage1,\n        \"lorem-generate\": stage2,\n        \"copy-foo-bar\": run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\"),\n        \"copy-bar-foobar\": run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\"),\n        \"copy-lorem-ipsum\": run_copy(\"lorem\", \"ipsum\", name=\"copy-lorem-ipsum\"),\n    }\n\n\ndef test_collect_not_a_group_stage_with_group_flag(tmp_dir, dvc, stages):\n    assert set(dvc.stage.collect(\"copy-bar-foobar\")) == {stages[\"copy-bar-foobar\"]}\n    assert set(dvc.stage.collect(\"copy-bar-foobar\", with_deps=True)) == {\n        stages[\"copy-bar-foobar\"],\n        stages[\"copy-foo-bar\"],\n        stages[\"foo-generate\"],\n    }\n    assert set(dvc.stage.collect_granular(\"copy-bar-foobar\")) == {\n        (stages[\"copy-bar-foobar\"], None)\n    }\n    assert set(dvc.stage.collect_granular(\"copy-bar-foobar\", with_deps=True)) == {\n        (stages[\"copy-bar-foobar\"], None),\n        (stages[\"copy-foo-bar\"], None),\n        (stages[\"foo-generate\"], None),\n    }\n\n\ndef test_collect_generated(tmp_dir, dvc):\n    d = {\n        \"vars\": [{\"vars\": [1, 2, 3, 4, 5]}],\n        \"stages\": {\"build\": {\"foreach\": \"${vars}\", \"do\": {\"cmd\": \"echo ${item}\"}}},\n    }\n    (tmp_dir / \"dvc.yaml\").dump(d)\n\n    all_stages = set(dvc.index.stages)\n    assert len(all_stages) == 5\n\n    assert set(dvc.stage.collect()) == all_stages\n    assert set(dvc.stage.collect(\"build\")) == all_stages\n    assert set(dvc.stage.collect(\"build\", with_deps=True)) == all_stages\n    assert set(dvc.stage.collect(\"build*\", glob=True)) == all_stages\n    assert set(dvc.stage.collect(\"build*\", glob=True, with_deps=True)) == all_stages\n\n    stages_info = {(stage, None) for stage in all_stages}\n    assert set(dvc.stage.collect_granular(\"build\")) == stages_info\n    assert set(dvc.stage.collect_granular(\"build\", with_deps=True)) == stages_info\n\n\ndef test_collect_glob(tmp_dir, dvc, stages):\n    assert set(dvc.stage.collect(\"copy*\", glob=True)) == {\n        stages[key] for key in [\"copy-bar-foobar\", \"copy-foo-bar\", \"copy-lorem-ipsum\"]\n    }\n    assert set(dvc.stage.collect(\"copy-lorem*\", glob=True, with_deps=True)) == {\n        stages[key] for key in [\"copy-lorem-ipsum\", \"lorem-generate\"]\n    }\n\n\ndef test_collect_granular_with_no_target(tmp_dir, dvc, stages):\n    assert set(map(itemgetter(0), dvc.stage.collect_granular())) == set(stages.values())\n    assert list(map(itemgetter(1), dvc.stage.collect_granular())) == [None] * len(\n        stages\n    )\n\n\ndef test_collect_granular_with_target(tmp_dir, dvc, stages):\n    assert dvc.stage.collect_granular(\"foo.dvc\") == [(stages[\"foo-generate\"], None)]\n    assert dvc.stage.collect_granular(PROJECT_FILE) == [\n        (stages[\"copy-foo-bar\"], None),\n        (stages[\"copy-bar-foobar\"], None),\n        (stages[\"copy-lorem-ipsum\"], None),\n    ]\n    assert dvc.stage.collect_granular(\":\") == [\n        (stages[\"copy-foo-bar\"], None),\n        (stages[\"copy-bar-foobar\"], None),\n        (stages[\"copy-lorem-ipsum\"], None),\n    ]\n    assert dvc.stage.collect_granular(\"copy-bar-foobar\") == [\n        (stages[\"copy-bar-foobar\"], None)\n    ]\n    assert dvc.stage.collect_granular(\":copy-bar-foobar\") == [\n        (stages[\"copy-bar-foobar\"], None)\n    ]\n    assert dvc.stage.collect_granular(\"dvc.yaml:copy-bar-foobar\") == [\n        (stages[\"copy-bar-foobar\"], None)\n    ]\n\n    with (tmp_dir / dvc.DVC_DIR).chdir():\n        assert dvc.stage.collect_granular(\n            relpath(tmp_dir / PROJECT_FILE) + \":copy-bar-foobar\"\n        ) == [(stages[\"copy-bar-foobar\"], None)]\n\n    assert dvc.stage.collect_granular(\"foobar\") == [\n        (stages[\"copy-bar-foobar\"], os.path.join(tmp_dir, \"foobar\"))\n    ]\n\n\n@pytest.mark.parametrize(\n    \"target\",\n    [\n        \"not_existing.dvc\",\n        \"not_existing.dvc:stage_name\",\n        \"not_existing/dvc.yaml\",\n        \"not_existing/dvc.yaml:stage_name\",\n    ],\n)\ndef test_collect_with_not_existing_dvcfile(tmp_dir, dvc, target):\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.collect_granular(target)\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.collect(target)\n\n\ndef test_collect_granular_with_not_existing_output_or_stage_name(tmp_dir, dvc):\n    with pytest.raises(NoOutputOrStageError):\n        dvc.stage.collect_granular(\"some_file\")\n    with pytest.raises(NoOutputOrStageError):\n        dvc.stage.collect_granular(\"some_file\", recursive=True)\n\n\ndef test_collect_granular_with_deps(tmp_dir, dvc, stages):\n    assert set(\n        map(\n            itemgetter(0),\n            dvc.stage.collect_granular(\"copy-foo-bar\", with_deps=True),\n        )\n    ) == {stages[\"copy-foo-bar\"], stages[\"foo-generate\"]}\n    assert set(\n        map(\n            itemgetter(0),\n            dvc.stage.collect_granular(\"copy-bar-foobar\", with_deps=True),\n        )\n    ) == {\n        stages[\"copy-bar-foobar\"],\n        stages[\"copy-foo-bar\"],\n        stages[\"foo-generate\"],\n    }\n    assert set(\n        map(\n            itemgetter(0),\n            dvc.stage.collect_granular(PROJECT_FILE, with_deps=True),\n        )\n    ) == set(stages.values())\n\n\ndef test_collect_granular_same_output_name_stage_name(tmp_dir, dvc, run_copy):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (stage2,) = tmp_dir.dvc_gen(\"copy-foo-bar\", \"copy-foo-bar\")\n    stage3 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    assert dvc.stage.collect_granular(\"copy-foo-bar\") == [(stage3, None)]\n\n    coll = dvc.stage.collect_granular(\"copy-foo-bar\", with_deps=True)\n    assert set(map(itemgetter(0), coll)) == {stage3, stage1}\n    assert list(map(itemgetter(1), coll)) == [None] * 2\n\n    assert dvc.stage.collect_granular(\"./copy-foo-bar\") == [\n        (stage2, os.path.join(tmp_dir / \"copy-foo-bar\"))\n    ]\n    assert dvc.stage.collect_granular(\"./copy-foo-bar\", with_deps=True) == [\n        (stage2, os.path.join(tmp_dir / \"copy-foo-bar\"))\n    ]\n\n\ndef test_collect_granular_priority_on_collision(tmp_dir, dvc, run_copy):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\"}, \"foo\": \"foo\"})\n    (stage1,) = dvc.add(\"dir/*\", glob=True)\n    stage2 = run_copy(\"foo\", \"bar\", name=\"dir\")\n\n    assert dvc.stage.collect_granular(\"dir\") == [(stage2, None)]\n    assert dvc.stage.collect_granular(\"dir\", recursive=True) == [(stage1, None)]\n\n    remove(tmp_dir / \"dir\")\n\n    assert dvc.stage.collect_granular(\"dir\") == [(stage2, None)]\n    assert dvc.stage.collect_granular(\"dir\", recursive=True) == [(stage2, None)]\n\n\ndef test_collect_granular_collision_output_dir_stage_name(tmp_dir, dvc, run_copy):\n    stage1, *_ = tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\"}, \"foo\": \"foo\"})\n    stage3 = run_copy(\"foo\", \"bar\", name=\"dir\")\n\n    assert dvc.stage.collect_granular(\"dir\") == [(stage3, None)]\n    assert not dvc.stage.collect_granular(\"dir\", recursive=True)\n    assert dvc.stage.collect_granular(\"./dir\") == [\n        (stage1, os.path.join(tmp_dir / \"dir\"))\n    ]\n\n\ndef test_collect_granular_not_existing_stage_name(tmp_dir, dvc, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (stage,) = tmp_dir.dvc_gen(\"copy-foo-bar\", \"copy-foo-bar\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    assert dvc.stage.collect_granular(\"copy-foo-bar.dvc:stage_name_not_needed\") == [\n        (stage, None)\n    ]\n    with pytest.raises(StageNotFound):\n        dvc.stage.collect_granular(\"dvc.yaml:does-not-exist\")\n\n\ndef test_get_stages(tmp_dir, dvc, run_copy):\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.load_all()\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage1 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    stage2 = run_copy(\"bar\", \"foobar\", name=\"copy-bar-foobar\")\n\n    assert set(dvc.stage.load_all()) == {stage1, stage2}\n    assert set(dvc.stage.load_all(path=PROJECT_FILE)) == {stage1, stage2}\n    assert set(dvc.stage.load_all(name=\"copy-bar-foobar\")) == {stage2}\n    assert set(dvc.stage.load_all(path=PROJECT_FILE, name=\"copy-bar-foobar\")) == {\n        stage2\n    }\n\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.load_all(path=relpath(tmp_dir / \"..\" / PROJECT_FILE))\n\n    with pytest.raises(StageNotFound):\n        dvc.stage.load_all(path=PROJECT_FILE, name=\"copy\")\n\n\ndef test_get_stages_old_dvcfile(tmp_dir, dvc):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert set(dvc.stage.load_all(\"foo.dvc\")) == {stage1}\n    assert set(dvc.stage.load_all(\"foo.dvc\", name=\"foo-generate\")) == {stage1}\n\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.load_all(path=relpath(tmp_dir / \"..\" / \"foo.dvc\"))\n\n\ndef test_get_stage(tmp_dir, dvc, run_copy):\n    tmp_dir.gen(\"foo\", \"foo\")\n    stage1 = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    with pytest.raises(StageNameUnspecified):\n        dvc.stage.load_one()\n\n    with pytest.raises(StageNameUnspecified):\n        dvc.stage.load_one(path=PROJECT_FILE)\n\n    assert dvc.stage.load_one(path=PROJECT_FILE, name=\"copy-foo-bar\") == stage1\n    assert dvc.stage.load_one(name=\"copy-foo-bar\") == stage1\n\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.load_one(path=\"something.yaml\", name=\"name\")\n\n    with pytest.raises(StageNotFound):\n        dvc.stage.load_one(name=\"random_name\")\n\n\ndef test_get_stage_single_stage_dvcfile(tmp_dir, dvc):\n    (stage1,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    assert dvc.stage.load_one(\"foo.dvc\") == stage1\n    assert dvc.stage.load_one(\"foo.dvc\", name=\"jpt\") == stage1\n    with pytest.raises(StageFileDoesNotExistError):\n        dvc.stage.load_one(path=\"bar.dvc\", name=\"name\")\n\n\ndef test_collect_optimization(tmp_dir, dvc, mocker):\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo text\")\n\n    # Forget cached stages and graph and error out on collection\n    dvc._reset()\n    mocker.patch(\n        \"dvc.repo.Repo.index\", property(raiser(Exception(\"Should not collect\")))\n    )\n\n    # Should read stage directly instead of collecting the whole graph\n    dvc.stage.collect(stage.path)\n    dvc.stage.collect_granular(stage.path)\n\n\ndef test_collect_optimization_on_stage_name(tmp_dir, dvc, mocker, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    # Forget cached stages and graph and error out on collection\n    dvc._reset()\n    mocker.patch(\n        \"dvc.repo.Repo.index\", property(raiser(Exception(\"Should not collect\")))\n    )\n\n    # Should read stage directly instead of collecting the whole graph\n    assert dvc.stage.collect(\"copy-foo-bar\") == [stage]\n    assert dvc.stage.collect_granular(\"copy-foo-bar\") == [(stage, None)]\n\n\ndef test_collect_repo_callback(tmp_dir, dvc, mocker):\n    mock = mocker.Mock()\n    dvc.stage_collection_error_handler = mock\n\n    (stage,) = tmp_dir.dvc_gen(\"foo\", \"foo\")\n    (tmp_dir / PROJECT_FILE).dump({\"stages\": {\"cmd\": \"echo hello world\"}})\n\n    dvc._reset()\n    assert dvc.index.stages == [stage]\n    mock.assert_called_once()\n\n    file_path, exc = mock.call_args[0]\n    assert file_path == PROJECT_FILE\n    assert isinstance(exc, YAMLValidationError)\n\n\ndef test_gitignored_file_try_collect_granular_for_data_files(tmp_dir, dvc, scm):\n    (stage,) = tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    path = os.path.join(\"data\", \"foo\")\n\n    assert dvc.stage.collect_granular(path) == [(stage, os.path.join(tmp_dir, path))]\n\n    scm.ignore(stage.path)\n    dvc._reset()\n\n    with pytest.raises(NoOutputOrStageError):\n        dvc.stage.collect_granular(path)\n\n\ndef test_gitignored_file_try_collect_granular_for_dvc_yaml_files(\n    tmp_dir, dvc, scm, stages\n):\n    assert dvc.stage.collect_granular(\"bar\") == [\n        (stages[\"copy-foo-bar\"], os.path.join(tmp_dir, \"bar\"))\n    ]\n\n    scm.ignore(tmp_dir / \"dvc.yaml\")\n    scm._reset()\n\n    with pytest.raises(FileIsGitIgnored):\n        dvc.stage.collect_granular(\"bar\")\n"
  },
  {
    "path": "tests/func/test_state.py",
    "content": "import os\n\nfrom dvc_data.hashfile.hash import file_md5\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom dvc_data.hashfile.state import State\n\n\ndef test_state(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo content\")\n    path = tmp_dir / \"foo\"\n    hash_info = HashInfo(\"md5\", file_md5(path, dvc.fs))\n\n    state = State(dvc.root_dir, dvc.tmp_dir, dvc.dvcignore)\n\n    state.save(str(path), dvc.fs, hash_info)\n    assert state.get(str(path), dvc.fs)[1] == hash_info\n\n    path.unlink()\n    path.write_text(\"1\")\n\n    assert state.get(str(path), dvc.fs) == (None, None)\n\n    hash_info = HashInfo(\"md5\", file_md5(path, dvc.fs))\n    state.save(str(path), dvc.fs, hash_info)\n\n    assert state.get(str(path), dvc.fs)[1] == hash_info\n\n\ndef test_state_overflow(tmp_dir, dvc):\n    # NOTE: trying to add more entries than state can handle,\n    # to see if it will clean up and vacuum successfully\n    dvc.config[\"state\"][\"row_limit\"] = 10\n\n    path = tmp_dir / \"dir\"\n    path.mkdir()\n    for i in range(20):\n        (path / str(i)).write_text(str(i))\n\n    dvc.add(\"dir\")\n\n\ndef mock_get_inode(inode):\n    def get_inode_mocked(_):\n        return inode\n\n    return get_inode_mocked\n\n\ndef test_remove_links(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo_content\", \"bar\": \"bar_content\"})\n\n    assert len(dvc.state.links) == 2\n\n    dvc.state.remove_links([\"foo\", \"bar\"], dvc.fs)\n\n    assert len(dvc.state.links) == 0\n\n\ndef test_get_unused_links(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo_content\", \"bar\": \"bar_content\"})\n\n    links = [os.path.join(dvc.root_dir, link) for link in [\"foo\", \"bar\"]]\n    assert set(dvc.state.get_unused_links([], dvc.fs)) == {\"foo\", \"bar\"}\n    assert set(dvc.state.get_unused_links(links[:1], dvc.fs)) == {\"bar\"}\n    assert set(dvc.state.get_unused_links(links, dvc.fs)) == set()\n    assert set(\n        dvc.state.get_unused_links(\n            ([*links[:1], os.path.join(dvc.root_dir, \"not-existing-file\")]),\n            dvc.fs,\n        )\n    ) == {\"bar\"}\n"
  },
  {
    "path": "tests/func/test_status.py",
    "content": "import os\n\nimport pytest\nfrom dulwich.porcelain import remove as git_rm\n\nfrom dvc.cli import main\nfrom dvc.fs import localfs\n\n\ndef test_quiet(tmp_dir, dvc, capsys):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    # clear\n    capsys.readouterr()\n\n    assert main([\"status\", \"--quiet\"]) == 0\n    out_err = capsys.readouterr()\n    assert not out_err.out\n    assert not out_err.err\n\n    tmp_dir.gen(\"foo\", \"barr\")\n\n    assert main([\"status\", \"--quiet\"]) == 1\n    out_err = capsys.readouterr()\n    assert not out_err.out\n    assert not out_err.err\n\n\ndef test_implied_cloud(dvc, mocker):\n    mock_status = mocker.patch(\"dvc.repo.status._cloud_status\", return_value=True)\n\n    main([\"status\", \"--remote\", \"something\"])\n    assert mock_status.called\n\n\n@pytest.mark.parametrize(\"check_updates\", [True, False])\ndef test_status_non_dvc_repo_import(tmp_dir, dvc, git_dir, check_updates):\n    with git_dir.branch(\"branch\", new=True):\n        git_dir.scm_gen(\"file\", \"first version\", commit=\"first version\")\n\n    dvc.imp(os.fspath(git_dir), \"file\", \"file\", rev=\"branch\")\n\n    assert dvc.status([\"file.dvc\"], check_updates=check_updates) == {}\n\n    with git_dir.branch(\"branch\", new=False):\n        git_dir.scm_gen(\"file\", \"second version\", commit=\"update file\")\n\n    status = dvc.status([\"file.dvc\"], check_updates=check_updates)\n    if check_updates:\n        assert status == {\n            \"file.dvc\": [{\"changed deps\": {f\"file ({git_dir})\": \"update available\"}}]\n        }\n    else:\n        assert status == {}\n\n\ndef test_status_before_and_after_dvc_init(tmp_dir, dvc, git_dir):\n    git_dir.scm_gen(\"file\", \"first version\", commit=\"first version\")\n    old_rev = git_dir.scm.get_rev()\n\n    dvc.imp(os.fspath(git_dir), \"file\", \"file\")\n\n    assert dvc.status([\"file.dvc\"]) == {}\n\n    with git_dir.chdir():\n        git_dir.init(dvc=True)\n        git_rm(git_dir, [\"file\"])\n        git_dir.dvc_gen(\"file\", \"second version\", commit=\"with dvc\")\n        new_rev = git_dir.scm.get_rev()\n\n    assert old_rev != new_rev\n\n    (status,) = dvc.status([\"file.dvc\"])[\"file.dvc\"]\n    assert status == {\n        \"changed deps\": {f\"file ({os.fspath(git_dir)})\": \"update available\"}\n    }\n\n\ndef test_status_on_pipeline_stages(tmp_dir, dvc, run_copy):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    stage = run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    stage.cmd = \"  \".join(stage.cmd.split())\n    stage.dvcfile._dump_pipeline_file(stage)\n    assert dvc.status(\"copy-foo-bar\") == {\"copy-foo-bar\": [\"changed command\"]}\n\n    # delete outputs\n    (tmp_dir / \"bar\").unlink()\n    assert dvc.status() == {\n        \"copy-foo-bar\": [\n            {\"changed outs\": {\"bar\": \"deleted\"}},\n            \"changed command\",\n        ]\n    }\n    (tmp_dir / \"foo\").unlink()\n    assert dvc.status() == {\n        \"foo.dvc\": [{\"changed outs\": {\"foo\": \"deleted\"}}],\n        \"copy-foo-bar\": [\n            {\"changed deps\": {\"foo\": \"deleted\"}},\n            {\"changed outs\": {\"bar\": \"deleted\"}},\n            \"changed command\",\n        ],\n    }\n\n\ndef test_status_recursive(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"file\": \"text1\", \"subdir\": {\"file2\": \"text2\"}}})\n    stages = dvc.add(localfs.find(\"dir\"), no_commit=True)\n\n    assert len(stages) == 2\n\n    assert dvc.status(targets=[\"dir\"], recursive=True) == {\n        os.path.join(\"dir\", \"file.dvc\"): [\n            {\"changed outs\": {os.path.join(\"dir\", \"file\"): \"not in cache\"}}\n        ],\n        os.path.join(\"dir\", \"subdir\", \"file2.dvc\"): [\n            {\"changed outs\": {os.path.join(\"dir\", \"subdir\", \"file2\"): \"not in cache\"}}\n        ],\n    }\n\n\ndef test_status_outputs(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\", \"bar\": \"bar\"})\n    dvc.run(\n        outs=[\"alice\", \"bob\"],\n        deps=[\"foo\", \"bar\"],\n        cmd=\"echo alice>alice && echo bob>bob\",\n        name=\"alice_bob\",\n    )\n    tmp_dir.gen({\"alice\": \"new alice\", \"bob\": \"new bob\"})\n\n    assert dvc.status(targets=[\"alice_bob\"]) == {\n        \"alice_bob\": [{\"changed outs\": {\"alice\": \"modified\", \"bob\": \"modified\"}}]\n    }\n\n    assert dvc.status(targets=[\"alice\"]) == {\n        \"alice_bob\": [{\"changed outs\": {\"alice\": \"modified\"}}]\n    }\n\n\ndef test_params_without_targets(tmp_dir, dvc):\n    dvc.stage.add(name=\"test\", cmd=\"echo params.yaml\", params=[{\"params.yaml\": None}])\n    assert dvc.status() == {\"test\": [{\"changed deps\": {\"params.yaml\": \"deleted\"}}]}\n\n    (tmp_dir / \"params.yaml\").touch()\n    assert dvc.status() == {\"test\": [{\"changed deps\": {\"params.yaml\": \"new\"}}]}\n\n    dvc.commit(\"test\", force=True)\n    # make sure that we are able to keep track of \"empty\" contents\n    # and be able to distinguish between no-lock-entry and empty-lock-entry.\n    assert (tmp_dir / \"dvc.lock\").parse() == {\n        \"schema\": \"2.0\",\n        \"stages\": {\"test\": {\"cmd\": \"echo params.yaml\", \"params\": {\"params.yaml\": {}}}},\n    }\n    assert dvc.status() == {}\n\n    (tmp_dir / \"params.yaml\").dump({\"foo\": \"foo\", \"bar\": \"bar\"})\n    assert dvc.status() == {\n        \"test\": [{\"changed deps\": {\"params.yaml\": {\"bar\": \"new\", \"foo\": \"new\"}}}]\n    }\n    dvc.commit(\"test\", force=True)\n\n    (tmp_dir / \"params.yaml\").dump({\"foo\": \"foobar\", \"lorem\": \"ipsum\"})\n    assert dvc.status() == {\n        \"test\": [\n            {\n                \"changed deps\": {\n                    \"params.yaml\": {\n                        \"bar\": \"deleted\",\n                        \"foo\": \"modified\",\n                        \"lorem\": \"new\",\n                    }\n                }\n            }\n        ]\n    }\n"
  },
  {
    "path": "tests/func/test_unprotect.py",
    "content": "import os\n\n\ndef test_unprotect(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    dvc.cache.local.cache_types = [\"hardlink\"]\n    dvc.add(\"foo\")\n    cache = os.path.join(\n        \".dvc\", \"cache\", \"files\", \"md5\", \"ac\", \"bd18db4cc2f85cedef654fccc4a4d8\"\n    )\n    assert not os.access(\"foo\", os.W_OK)\n    assert not os.access(cache, os.W_OK)\n\n    dvc.unprotect(\"foo\")\n    assert os.access(\"foo\", os.W_OK)\n\n    if os.name == \"nt\":\n        # NOTE: cache is now unprotected, because NTFS doesn't allow\n        # deleting read-only files, so we have to try to set write perms\n        # on files that we try to delete, which propagates to the cache\n        # file. But it should be restored after the next cache check, hence\n        # why we call `dvc status` here.\n        assert os.access(cache, os.W_OK)\n        dvc.status()\n\n    assert not os.access(cache, os.W_OK)\n"
  },
  {
    "path": "tests/func/test_update.py",
    "content": "import os\n\nimport pytest\nfrom dulwich.porcelain import remove as git_rm\n\nfrom dvc.dependency import base\nfrom dvc.dvcfile import load_file\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.testing.tmp_dir import make_subrepo\n\n\n@pytest.mark.parametrize(\"cached\", [True, False])\ndef test_update_import(tmp_dir, dvc, erepo_dir, cached):\n    gen = erepo_dir.dvc_gen if cached else erepo_dir.scm_gen\n\n    with erepo_dir.branch(\"branch\", new=True), erepo_dir.chdir():\n        gen(\n            {\n                \"version\": \"branch\",\n                \"dir\": {\"version\": \"branch\", \"subdir\": {\"file\": \"file\"}},\n            },\n            commit=\"add version file\",\n        )\n        old_rev = erepo_dir.scm.get_rev()\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"version\", \"version\", rev=\"branch\")\n    dir_stage = dvc.imp(os.fspath(erepo_dir), \"dir\", \"dir\", rev=\"branch\")\n    assert dvc.status() == {}\n\n    assert (tmp_dir / \"version\").read_text() == \"branch\"\n    assert (tmp_dir / \"dir\").read_text() == {\n        \"version\": \"branch\",\n        \"subdir\": {\"file\": \"file\"},\n    }\n    assert stage.deps[0].def_repo[\"rev_lock\"] == old_rev\n    assert dir_stage.deps[0].def_repo[\"rev_lock\"] == old_rev\n\n    # Update version file\n    with erepo_dir.branch(\"branch\", new=False), erepo_dir.chdir():\n        gen(\n            {\n                \"version\": \"updated\",\n                \"dir\": {\"version\": \"updated\", \"subdir\": {\"file\": \"file\"}},\n            },\n            commit=\"update version content\",\n        )\n        new_rev = erepo_dir.scm.get_rev()\n\n    assert old_rev != new_rev\n\n    assert dvc.status() == {\n        \"dir.dvc\": [\n            {\"changed deps\": {f\"dir ({os.fspath(erepo_dir)})\": \"update available\"}}\n        ],\n        \"version.dvc\": [\n            {\"changed deps\": {f\"version ({os.fspath(erepo_dir)})\": \"update available\"}}\n        ],\n    }\n\n    (stage,) = dvc.update(stage.path)\n    (dir_stage,) = dvc.update(dir_stage.path)\n    assert dvc.status() == {}\n\n    assert (tmp_dir / \"version\").read_text() == \"updated\"\n    assert (tmp_dir / \"dir\").read_text() == {\n        \"version\": \"updated\",\n        \"subdir\": {\"file\": \"file\"},\n    }\n\n    assert stage.deps[0].def_repo[\"rev_lock\"] == new_rev\n    assert dir_stage.deps[0].def_repo[\"rev_lock\"] == new_rev\n\n\ndef test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):\n    old_rev = None\n    with erepo_dir.branch(\"branch\", new=True), erepo_dir.chdir():\n        erepo_dir.scm_gen(\"version\", \"branch\", commit=\"add version file\")\n        old_rev = erepo_dir.scm.get_rev()\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"version\", \"version\", rev=\"branch\")\n\n    imported = tmp_dir / \"version\"\n    assert imported.is_file()\n    assert imported.read_text() == \"branch\"\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev\": \"branch\",\n        \"rev_lock\": old_rev,\n    }\n\n    new_rev = None\n    with erepo_dir.branch(\"branch\", new=False), erepo_dir.chdir():\n        git_rm(erepo_dir, [\"version\"], cached=True)\n        erepo_dir.dvc_gen(\"version\", \"updated\", commit=\"upgrade to DVC tracking\")\n        new_rev = erepo_dir.scm.get_rev()\n\n    assert old_rev != new_rev\n\n    (status,) = dvc.status([stage.path])[\"version.dvc\"]\n    (changed_dep,) = list(status[\"changed deps\"].items())\n    assert changed_dep[0].startswith(\"version \")\n    assert changed_dep[1] == \"update available\"\n\n    dvc.update([stage.path])\n\n    assert dvc.status([stage.path]) == {}\n\n    assert imported.is_file()\n    assert imported.read_text() == \"updated\"\n\n    stage = load_file(dvc, stage.path).stage\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(erepo_dir),\n        \"rev\": \"branch\",\n        \"rev_lock\": new_rev,\n    }\n\n\ndef test_update_before_and_after_dvc_init(tmp_dir, dvc, git_dir):\n    with git_dir.chdir():\n        git_dir.scm_gen(\"file\", \"first version\", commit=\"first version\")\n        old_rev = git_dir.scm.get_rev()\n\n    stage = dvc.imp(os.fspath(git_dir), \"file\", \"file\")\n\n    with git_dir.chdir():\n        git_dir.init(dvc=True)\n        git_rm(git_dir, [\"file\"])\n        git_dir.dvc_gen(\"file\", \"second version\", commit=\"with dvc\")\n        new_rev = git_dir.scm.get_rev()\n\n    assert old_rev != new_rev\n\n    assert dvc.status([stage.path]) == {\n        \"file.dvc\": [\n            {\"changed deps\": {f\"file ({os.fspath(git_dir)})\": \"update available\"}}\n        ]\n    }\n\n    dvc.update([stage.path])\n\n    assert (tmp_dir / \"file\").read_text() == \"second version\"\n    assert dvc.status([stage.path]) == {}\n\n\ndef test_update_unchanged(tmp_dir, dvc, erepo_dir, mocker):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"file content\", commit=\"add file\")\n\n    assert (erepo_dir / \"file\").exists()\n    stage = dvc.imp(os.fspath(erepo_dir), \"file\")\n\n    spy = mocker.spy(base, \"fs_download\")\n    dvc.update([stage.path])\n\n    assert not spy.called\n\n\n@pytest.mark.parametrize(\"outs_exist\", [False, True])\ndef test_update_no_download(tmp_dir, dvc, erepo_dir, outs_exist, mocker):\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"file content\", commit=\"add file\")\n        initial_rev = erepo_dir.scm.get_rev()\n\n    stage = dvc.imp(os.fspath(erepo_dir), \"file\", no_download=not outs_exist)\n\n    assert stage.deps[0].def_repo[\"rev_lock\"] == initial_rev\n\n    dst = tmp_dir / \"file\"\n    assert dst.exists() is outs_exist\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen(\"file\", \"updated file content\", commit=\"update file\")\n        new_rev = erepo_dir.scm.get_rev()\n\n    updated_stage = dvc.update([stage.path], rev=new_rev, no_download=True)[0]\n    assert not dst.exists()\n\n    assert updated_stage.deps[0].def_repo[\"rev_lock\"] == new_rev\n\n    # output must have no information since no_download=True\n    out = updated_stage.outs[0]\n    assert out.hash_info.value is None\n    assert out.meta.size is None\n\n\ndef test_update_import_url(tmp_dir, dvc, workspace):\n    workspace.gen(\"file\", \"file content\")\n\n    dst = tmp_dir / \"imported_file\"\n    stage = dvc.imp_url(\"remote://workspace/file\", os.fspath(dst))\n\n    assert dst.is_file()\n    assert dst.read_text() == \"file content\"\n\n    # update data\n    workspace.gen(\"file\", \"updated file content\")\n\n    assert dvc.status([stage.path]) == {}\n    dvc.update([stage.path])\n    assert dvc.status([stage.path]) == {}\n\n    assert dst.is_file()\n    assert dst.read_text() == \"updated file content\"\n\n\n@pytest.mark.parametrize(\"outs_exist\", [False, True])\ndef test_update_import_url_no_download(tmp_dir, dvc, workspace, outs_exist, mocker):\n    workspace.gen(\"file\", \"file content\")\n\n    dst = tmp_dir / \"imported_file\"\n    stage = dvc.imp_url(\n        \"remote://workspace/file\", os.fspath(dst), no_download=not outs_exist\n    )\n\n    assert dst.exists() is outs_exist\n    hash_info = stage.deps[0].hash_info\n    assert hash_info.value == \"d10b4c3ff123b26dc068d43a8bef2d23\"\n\n    workspace.gen(\"file\", \"updated file content\")\n\n    updated_stage = dvc.update([stage.path], no_download=True)[0]\n    assert not dst.exists()\n\n    updated_hash_info = updated_stage.deps[0].hash_info\n    assert updated_hash_info != hash_info\n    assert updated_hash_info.value == \"6ffba511ce3aa40b8231d1b1f8c5fba5\"\n\n    # output must have no information since no_download=True\n    out = updated_stage.outs[0]\n    assert out.hash_info.value is None\n    assert out.hash_info.name is None\n    assert out.meta.size is None\n\n\ndef test_update_import_url_unchanged(tmp_dir, dvc, workspace, mocker):\n    workspace.gen(\"file\", \"file content\")\n\n    dst = tmp_dir / \"imported_file\"\n    stage = dvc.imp_url(\"remote://workspace/file\", os.fspath(dst))\n\n    spy = mocker.spy(base, \"fs_download\")\n\n    dvc.update([stage.path])\n    assert not spy.called\n\n\ndef test_update_rev(tmp_dir, dvc, scm, git_dir):\n    with git_dir.chdir():\n        git_dir.scm_gen({\"foo\": \"foo\"}, commit=\"first\")\n\n    dvc.imp(os.fspath(git_dir), \"foo\")\n    assert (tmp_dir / \"foo.dvc\").exists()\n\n    with git_dir.chdir(), git_dir.branch(\"branch1\", new=True):\n        git_dir.scm_gen({\"foo\": \"foobar\"}, commit=\"branch1 commit\")\n        branch1_head = git_dir.scm.get_rev()\n\n    with git_dir.chdir(), git_dir.branch(\"branch2\", new=True):\n        git_dir.scm_gen({\"foo\": \"foobar foo\"}, commit=\"branch2 commit\")\n        branch2_head = git_dir.scm.get_rev()\n\n    stage = dvc.update([\"foo.dvc\"], rev=\"branch1\")[0]\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(git_dir),\n        \"rev\": \"branch1\",\n        \"rev_lock\": branch1_head,\n    }\n    with open(tmp_dir / \"foo\", encoding=\"utf-8\") as f:\n        assert f.read() == \"foobar\"\n\n    stage = dvc.update([\"foo.dvc\"], rev=\"branch2\")[0]\n    assert stage.deps[0].def_repo == {\n        \"url\": os.fspath(git_dir),\n        \"rev\": \"branch2\",\n        \"rev_lock\": branch2_head,\n    }\n    with open(tmp_dir / \"foo\", encoding=\"utf-8\") as f:\n        assert f.read() == \"foobar foo\"\n\n\ndef test_update_recursive(tmp_dir, dvc, erepo_dir):\n    with erepo_dir.branch(\"branch\", new=True), erepo_dir.chdir():\n        erepo_dir.scm_gen(\n            {\"foo1\": \"text1\", \"foo2\": \"text2\", \"foo3\": \"text3\"},\n            commit=\"add foo files\",\n        )\n        old_rev = erepo_dir.scm.get_rev()\n\n    tmp_dir.gen({\"dir\": {\"subdir\": {}}})\n    stage1 = dvc.imp(\n        os.fspath(erepo_dir), \"foo1\", os.path.join(\"dir\", \"foo1\"), rev=\"branch\"\n    )\n    stage2 = dvc.imp(\n        os.fspath(erepo_dir),\n        \"foo2\",\n        os.path.join(\"dir\", \"subdir\", \"foo2\"),\n        rev=\"branch\",\n    )\n    stage3 = dvc.imp(\n        os.fspath(erepo_dir),\n        \"foo3\",\n        os.path.join(\"dir\", \"subdir\", \"foo3\"),\n        rev=\"branch\",\n    )\n\n    assert (tmp_dir / os.path.join(\"dir\", \"foo1\")).read_text() == \"text1\"\n    assert (tmp_dir / os.path.join(\"dir\", \"subdir\", \"foo2\")).read_text() == \"text2\"\n    assert (tmp_dir / os.path.join(\"dir\", \"subdir\", \"foo3\")).read_text() == \"text3\"\n\n    assert stage1.deps[0].def_repo[\"rev_lock\"] == old_rev\n    assert stage2.deps[0].def_repo[\"rev_lock\"] == old_rev\n    assert stage3.deps[0].def_repo[\"rev_lock\"] == old_rev\n\n    with erepo_dir.branch(\"branch\", new=False), erepo_dir.chdir():\n        erepo_dir.scm_gen(\n            {\"foo1\": \"updated1\", \"foo2\": \"updated2\", \"foo3\": \"updated3\"},\n            \"\",\n            \"update foo content\",\n        )\n        new_rev = erepo_dir.scm.get_rev()\n\n    assert old_rev != new_rev\n\n    dvc.update([\"dir\"], recursive=True)\n\n    stage1 = load_file(dvc, stage1.path).stage\n    stage2 = load_file(dvc, stage2.path).stage\n    stage3 = load_file(dvc, stage3.path).stage\n    assert stage1.deps[0].def_repo[\"rev_lock\"] == new_rev\n    assert stage2.deps[0].def_repo[\"rev_lock\"] == new_rev\n    assert stage3.deps[0].def_repo[\"rev_lock\"] == new_rev\n\n\n@pytest.mark.parametrize(\"is_dvc\", [True, False])\ndef test_update_from_subrepos(tmp_dir, dvc, erepo_dir, is_dvc):\n    subrepo = erepo_dir / \"subrepo\"\n    make_subrepo(subrepo, erepo_dir.scm)\n    gen = subrepo.dvc_gen if is_dvc else subrepo.scm_gen\n    with subrepo.chdir():\n        gen(\"foo\", \"foo\", commit=\"subrepo initial\")\n\n    path = os.path.join(\"subrepo\", \"foo\")\n    repo_path = os.fspath(erepo_dir)\n    dvc.imp(repo_path, path, out=\"out\")\n    assert dvc.status() == {}\n\n    with subrepo.chdir():\n        gen(\"foo\", \"foobar\", commit=\"subrepo second commit\")\n\n    assert dvc.status()[\"out.dvc\"][0][\"changed deps\"] == {\n        f\"{path} ({repo_path})\": \"update available\"\n    }\n    (stage,) = dvc.update([\"out.dvc\"])\n\n    assert (tmp_dir / \"out\").read_text() == \"foobar\"\n    assert stage.deps[0].def_path == os.path.join(\"subrepo\", \"foo\")\n    assert stage.deps[0].def_repo == {\n        \"url\": repo_path,\n        \"rev_lock\": erepo_dir.scm.get_rev(),\n    }\n\n\ndef test_update_import_to_remote(tmp_dir, dvc, erepo_dir, local_remote):\n    erepo_dir.scm_gen({\"foo\": \"foo\"}, commit=\"add foo\")\n    stage = dvc.imp(os.fspath(erepo_dir), \"foo\")\n    erepo_dir.scm_gen({\"foo\": \"bar\"}, commit=\"update foo\")\n    with pytest.raises(InvalidArgumentError):\n        dvc.update(stage.path, to_remote=True)\n\n\ndef test_update_import_url_to_remote(tmp_dir, dvc, workspace, local_remote):\n    workspace.gen(\"foo\", \"foo\")\n    stage = dvc.imp_url(\"remote://workspace/foo\", to_remote=True)\n\n    workspace.gen(\"foo\", \"bar\")\n    (updated,) = dvc.update(stage.path, to_remote=True)\n\n    assert stage.deps[0].hash_info != updated.deps[0].hash_info\n    assert stage.outs[0].hash_info != updated.outs[0].hash_info\n\n    dvc.pull(\"foo\")\n    assert (tmp_dir / \"foo\").read_text() == \"bar\"\n\n\ndef test_update_import_url_to_remote_directory(\n    mocker, tmp_dir, dvc, workspace, local_remote\n):\n    workspace.gen({\"data\": {\"foo\": \"foo\", \"bar\": {\"baz\": \"baz\"}}})\n    stage = dvc.imp_url(\"remote://workspace/data\", to_remote=True)\n\n    workspace.gen(\n        {\n            \"data\": {\n                \"foo2\": \"foo2\",\n                \"bar\": {\"baz2\": \"baz2\"},\n                \"repeated_hashes\": {\n                    \"foo\": \"foo\",\n                    \"baz\": \"baz\",\n                    \"foo_with_different_name\": \"foo\",\n                },\n            }\n        }\n    )\n\n    (updated,) = dvc.update(stage.path, to_remote=True)\n\n    assert stage.deps[0].hash_info != updated.deps[0].hash_info\n    assert stage.outs[0].hash_info != updated.outs[0].hash_info\n\n    dvc.pull(\"data\")\n    assert (tmp_dir / \"data\").read_text() == {\n        \"foo\": \"foo\",\n        \"foo2\": \"foo2\",\n        \"bar\": {\"baz\": \"baz\", \"baz2\": \"baz2\"},\n        \"repeated_hashes\": {\n            \"foo\": \"foo\",\n            \"baz\": \"baz\",\n            \"foo_with_different_name\": \"foo\",\n        },\n    }\n\n\ndef test_update_import_url_to_remote_directory_changed_contents(\n    tmp_dir, dvc, local_workspace, local_remote\n):\n    local_workspace.gen({\"data\": {\"foo\": \"foo\", \"bar\": {\"baz\": \"baz\"}}})\n    stage = dvc.imp_url(\"remote://workspace/data\", to_remote=True)\n\n    local_workspace.gen(\n        {\"data\": {\"foo\": \"not_foo\", \"foo2\": \"foo\", \"bar\": {\"baz2\": \"baz2\"}}}\n    )\n    (updated,) = dvc.update(stage.path, to_remote=True)\n\n    assert stage.deps[0].hash_info != updated.deps[0].hash_info\n    assert stage.outs[0].hash_info != updated.outs[0].hash_info\n\n    dvc.pull(\"data\")\n    assert (tmp_dir / \"data\").read_text() == {\n        \"foo\": \"not_foo\",\n        \"foo2\": \"foo\",\n        \"bar\": {\"baz\": \"baz\", \"baz2\": \"baz2\"},\n    }\n\n\ndef test_update_import_url_to_remote_directory_same_hash(\n    tmp_dir, dvc, local_workspace, local_remote\n):\n    local_workspace.gen({\"data\": {\"foo\": \"foo\", \"bar\": {\"baz\": \"baz\"}, \"same\": \"same\"}})\n    stage = dvc.imp_url(\"remote://workspace/data\", to_remote=True)\n\n    local_workspace.gen({\"data\": {\"foo\": \"baz\", \"bar\": {\"baz\": \"foo\"}, \"same\": \"same\"}})\n    (updated,) = dvc.update(stage.path, to_remote=True)\n\n    assert stage.deps[0].hash_info != updated.deps[0].hash_info\n    assert stage.outs[0].hash_info != updated.outs[0].hash_info\n\n    dvc.pull(\"data\")\n    assert (tmp_dir / \"data\").read_text() == {\n        \"foo\": \"baz\",\n        \"bar\": {\"baz\": \"foo\"},\n        \"same\": \"same\",\n    }\n"
  },
  {
    "path": "tests/func/test_used_objs.py",
    "content": "import json\nimport os\n\nimport pytest\n\n\n@pytest.mark.parametrize(\n    \"stage_wdir, cwd, target\",\n    [\n        (os.curdir, os.curdir, \"foo\"),\n        (os.curdir, os.curdir, \"train\"),\n        (os.curdir, os.curdir, \"dvc.yaml:train\"),\n        (os.curdir, \"sub\", os.path.join(os.pardir, \"foo\")),\n        (\n            os.curdir,\n            \"sub\",\n            os.path.join(os.pardir, \"dvc.yaml:train\"),\n        ),\n        (\"sub\", os.curdir, os.path.join(\"sub\", \"foo\")),\n        (\"sub\", os.curdir, os.path.join(\"sub\", \"dvc.yaml:train\")),\n        (\"sub\", \"sub\", \"foo\"),\n        (\"sub\", \"sub\", \"train\"),\n        (\"sub\", \"sub\", \"dvc.yaml:train\"),\n        (\"sub\", \"dir\", os.path.join(os.pardir, \"sub\", \"foo\")),\n        (\n            \"sub\",\n            \"dir\",\n            os.path.join(os.pardir, \"sub\", \"dvc.yaml:train\"),\n        ),\n    ],\n)\ndef test_from_gitfs_when_pwd_not_in_root(tmp_dir, scm, dvc, stage_wdir, cwd, target):\n    path = tmp_dir.joinpath(stage_wdir).resolve()\n    path.mkdir(parents=True, exist_ok=True)\n    wdir = tmp_dir.joinpath(cwd).resolve()\n    wdir.mkdir(parents=True, exist_ok=True)\n\n    (path / \"dvc.yaml\").write_text(\n        json.dumps({\"stages\": {\"train\": {\"cmd\": \"echo foo > foo\", \"outs\": [\"foo\"]}}})\n    )\n    path.gen({\"foo\": \"foo\"})\n    dvc.commit(None, force=True)\n    tmp_dir.scm_add(\n        [path / file for file in (\"dvc.yaml\", \"dvc.lock\", \".gitignore\")],\n        commit=\"add files\",\n    )\n\n    with wdir.chdir():\n        assert dvc.used_objs([target], revs=[scm.get_rev()])\n\n\ndef test_used_objs_push(tmp_dir, scm, dvc):\n    stage = tmp_dir.dvc_gen(\"foo\", \"foo\")[0]\n    hash_info = stage.outs[0].hash_info\n\n    stage.outs[0].can_push = True\n    assert stage.get_used_objs(push=False) == {None: {hash_info}}\n    assert stage.get_used_objs(push=True) == {None: {hash_info}}\n\n    stage.outs[0].can_push = False\n    assert stage.get_used_objs(push=False) == {None: {hash_info}}\n    assert stage.get_used_objs(push=True) == {}\n"
  },
  {
    "path": "tests/func/test_utils.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc import utils\nfrom dvc.exceptions import DvcException\n\n\ndef test_dict_md5():\n    d = {\n        \"cmd\": \"python code.py foo file1\",\n        \"locked\": \"true\",\n        \"outs\": [\n            {\n                \"path\": \"file1\",\n                \"metric\": {\"type\": \"raw\"},\n                \"cache\": False,\n                \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            }\n        ],\n        \"deps\": [\n            {\"path\": \"foo\", \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\"},\n            {\"path\": \"code.py\", \"md5\": \"d05447644b89960913c7eee5fd776adb\"},\n        ],\n    }\n\n    md5 = \"8b263fa05ede6c3145c164829be694b4\"\n\n    assert md5 == utils.dict_md5(d, exclude=[\"metric\", \"locked\"])\n\n\ndef test_boxify():\n    expected = (\n        \"+-----------------+\\n\"\n        \"|                 |\\n\"\n        \"|     message     |\\n\"\n        \"|                 |\\n\"\n        \"+-----------------+\\n\"\n    )\n\n    assert expected == utils.boxify(\"message\")\n\n\ndef test_glob_no_match():\n    with pytest.raises(\n        DvcException, match=re.escape(\"Glob ['invalid*'] has no matches.\")\n    ):\n        utils.glob_targets([\"invalid*\"], glob=True)\n"
  },
  {
    "path": "tests/func/test_version.py",
    "content": "import re\n\nfrom dvc.cli import main\nfrom tests.unit.test_info import (\n    DVC_VERSION_REGEX,\n    PYTHON_VERSION_REGEX,\n    SUBPROJECTS,\n    find_supported_remotes,\n)\n\n\ndef test_(tmp_dir, dvc, scm, capsys):\n    assert main([\"version\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert re.search(rf\"DVC version: {DVC_VERSION_REGEX}\", out)\n    assert re.search(f\"Platform: {PYTHON_VERSION_REGEX} on .*\", out)\n    for subproject in SUBPROJECTS:\n        assert re.search(rf\"{subproject} = .*\", out)\n\n    assert find_supported_remotes(out)\n    assert re.search(r\"Cache types: .*\", out)\n    assert re.search(r\"Caches: local\", out)\n    assert re.search(r\"Remotes: None\", out)\n    assert \"Repo: dvc, git\" in out\n\n\ndef test_import_error(tmp_dir, dvc, scm, capsys, monkeypatch):\n    import importlib.metadata as importlib_metadata\n\n    original = importlib_metadata.version\n\n    def _import_error(name):\n        if name == \"dvclive\":\n            raise ImportError\n        return original(name)\n\n    monkeypatch.setattr(importlib_metadata, \"version\", _import_error)\n    assert main([\"version\"]) == 0\n\n    out, _ = capsys.readouterr()\n\n    for subproject in SUBPROJECTS:\n        match = re.search(rf\"{subproject} = {DVC_VERSION_REGEX}\", out)\n        if subproject != \"dvclive\":\n            assert match\n        else:\n            assert match is None\n"
  },
  {
    "path": "tests/func/test_virtual_directory.py",
    "content": "import os\nimport shutil\nfrom os.path import join\n\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom dvc_data.hashfile.meta import Meta\n\n\ndef test_virtual_add(tmp_dir, dvc, remote):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"5ea40360f5b4ec688df672a4db9c17d1.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=6, nfiles=2)\n\n    assert dvc.push() == 3\n    dvc.cache.local.clear()\n\n    tmp_dir.gen(\n        {\"dir\": {\"foobar\": \"foobar\", \"lorem\": \"ipsum\", \"subdir\": {\"file\": \"file\"}}}\n    )\n    (stage,) = dvc.add(\"dir/foobar\")\n\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"a5beca056acbef9e0013347efdc2b751.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=12, nfiles=3)\n    assert dvc.push() == 2\n\n    (stage,) = dvc.add(\"dir/subdir\")\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"de78e9fff7c3478c6b316bf08437d0f6.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=16, nfiles=4)\n    assert dvc.push() == 2\n\n\ndef test_virtual_remove(tmp_dir, dvc, remote):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n                \"subdir\": {\"lorem\": \"lorem\", \"ipsum\": \"ipsum\"},\n            }\n        }\n    )\n\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"15b0e3c73ad2c748ce206988cb6b7319.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=16, nfiles=4)\n\n    assert dvc.push() == 5\n    dvc.cache.local.clear()\n\n    (tmp_dir / \"dir\" / \"foo\").unlink()\n    (stage,) = dvc.add(\"dir/foo\")\n\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"991ea7d558d320d8817a0798e9c676f1.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=None, nfiles=3)\n\n    assert dvc.push() == 1\n\n    shutil.rmtree(tmp_dir / \"dir\" / \"subdir\")\n    (stage,) = dvc.add(\"dir/subdir\")\n\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"91aaa9bb58b657d623ef143b195a67e4.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=None, nfiles=1)\n    assert dvc.push() == 1\n\n\ndef test_virtual_update_dir(tmp_dir, dvc, remote):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"subdir\": {\"lorem\": \"lorem\"}}})\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"22a16c9bf84b3068bc2206d88a6b5776.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=8, nfiles=2)\n\n    assert dvc.push() == 3\n    dvc.cache.local.clear()\n    shutil.rmtree(\"dir\")\n\n    tmp_dir.gen({\"dir\": {\"subdir\": {\"ipsum\": \"lorem ipsum\", \"file\": \"file\"}}})\n    (stage,) = dvc.add(\"dir/subdir\")\n\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"32f5734ea1a2aa1a067c0c15f0ae5781.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=None, nfiles=3)\n    assert dvc.push() == 3\n\n\ndef test_virtual_update_file(tmp_dir, dvc, remote):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"subdir\": {\"lorem\": \"lorem\"}}})\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"22a16c9bf84b3068bc2206d88a6b5776.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=8, nfiles=2)\n\n    assert dvc.push() == 3\n    dvc.cache.local.clear()\n    shutil.rmtree(\"dir\")\n\n    tmp_dir.gen({\"dir\": {\"foo\": \"foobar\"}})\n    (stage,) = dvc.add(\"dir/foo\")\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\n        name=\"md5\", value=\"49408ac059c76086a3a892129a324b60.dir\"\n    )\n    assert out.meta == Meta(isdir=True, size=None, nfiles=2)\n    assert dvc.push() == 2\n\n\ndef test_virtual_update_noop(tmp_dir, dvc, remote):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"subdir\": {\"lorem\": \"lorem\"}}})\n\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n    hash_info = HashInfo(name=\"md5\", value=\"22a16c9bf84b3068bc2206d88a6b5776.dir\")\n    meta = Meta(isdir=True, size=8, nfiles=2)\n\n    assert out.hash_info == hash_info\n    assert out.meta == meta\n    assert dvc.push() == 3\n\n    dvc.cache.local.clear()\n    shutil.rmtree(\"dir\")\n\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"subdir\": {\"lorem\": \"lorem\"}}})\n\n    (stage,) = dvc.add(\"dir/foo\")\n    out = stage.outs[0]\n    assert out.hash_info == hash_info\n    assert out.meta == meta\n    assert not dvc.push()\n\n    dvc.cache.local.clear()\n\n    (stage,) = dvc.add(\"dir/subdir\")\n    out = stage.outs[0]\n    assert out.hash_info == hash_info\n    assert out.meta == meta\n    assert not dvc.push()\n\n\ndef test_partial_checkout_and_update(tmp_dir, dvc, remote):\n    dir1 = {f\"{i}.txt\": f\"dir1 {i}\" for i in range(10)}\n    dir2 = {f\"{i}.txt\": f\"dir2 {i}\" for i in range(10)}\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"subdir\": dir1, \"subdir2\": dir2}})\n\n    (stage,) = dvc.add(\"dir\")\n    out = stage.outs[0]\n\n    assert out.hash_info == HashInfo(\"md5\", \"9899dc38082ee0ec33de077df62a4a12.dir\")\n    assert out.meta == Meta(isdir=True, size=123, nfiles=21)\n\n    assert dvc.push() == 22\n    dvc.cache.local.clear()\n    shutil.rmtree(\"dir\")\n\n    assert dvc.pull(\"dir/subdir\") == {\n        \"added\": [\"dir\" + os.sep],\n        \"deleted\": [],\n        \"modified\": [],\n        \"stats\": {\"added\": 10, \"deleted\": 0, \"fetched\": 11, \"modified\": 0},\n    }\n    assert (tmp_dir / \"dir\").read_text() == {\"subdir\": dir1}\n\n    tmp_dir.gen({\"dir\": {\"subdir\": {\"file\": \"file\"}}})\n    (stage,) = dvc.add(join(\"dir\", \"subdir\", \"file\"))\n\n    out = stage.outs[0]\n    assert out.hash_info == HashInfo(\"md5\", \"e7531c3930f28a00edc25e6cef91db03.dir\")\n    assert out.meta == Meta(isdir=True, size=127, nfiles=22)\n    assert dvc.push() == 2\n"
  },
  {
    "path": "tests/func/utils/__init__.py",
    "content": ""
  },
  {
    "path": "tests/func/utils/test_hydra.py",
    "content": "import re\nfrom contextlib import nullcontext as does_not_raise\n\nimport pytest\n\nfrom dvc.exceptions import InvalidArgumentError\n\n\n@pytest.mark.parametrize(\"suffix\", [\"yaml\", \"toml\", \"json\"])\n@pytest.mark.parametrize(\n    \"overrides, expected\",\n    [\n        # Overriding\n        ([\"foo=baz\"], {\"foo\": \"baz\", \"goo\": {\"bag\": 3.0}, \"lorem\": False}),\n        ([\"foo=baz\", \"goo=bar\"], {\"foo\": \"baz\", \"goo\": \"bar\", \"lorem\": False}),\n        (\n            [\"foo.0=bar\"],\n            {\"foo\": [\"bar\", {\"baz\": 2}], \"goo\": {\"bag\": 3.0}, \"lorem\": False},\n        ),\n        (\n            [\"foo.1.baz=3\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 3}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": False,\n            },\n        ),\n        (\n            [\"goo.bag=4.0\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 4.0},\n                \"lorem\": False,\n            },\n        ),\n        (\n            [\"++goo={bag: 1, b: 2}\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 1, \"b\": 2},\n                \"lorem\": False,\n            },\n        ),\n        # 6129\n        (\n            [\"lorem=\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": \"\",\n            },\n        ),\n        # 6129\n        (\n            [\"lorem=null\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": None,\n            },\n        ),\n        # 5868\n        (\n            [\"lorem=1992-11-20\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": \"1992-11-20\",\n            },\n        ),\n        # 5868\n        (\n            [\"lorem='1992-11-20'\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": \"1992-11-20\",\n            },\n        ),\n        # Appending\n        (\n            [\"+a=1\"],\n            {\n                \"foo\": [{\"bar\": 1}, {\"baz\": 2}],\n                \"goo\": {\"bag\": 3.0},\n                \"lorem\": False,\n                \"a\": 1,\n            },\n        ),\n        # Removing\n        ([\"~foo\"], {\"goo\": {\"bag\": 3.0}, \"lorem\": False}),\n    ],\n)\ndef test_apply_overrides(tmp_dir, suffix, overrides, expected):\n    from dvc.utils.hydra import apply_overrides\n\n    if suffix == \"toml\" and overrides in [\n        [\"foo=baz\"],\n        [\"foo.0=bar\"],\n        [\"foo=baz\", \"goo=bar\"],\n        [\"lorem=null\"],\n    ]:\n        pytest.skip(\n            \"TOML dumper breaks when overriding a list/dict with other type or\"\n            \" when handling `null` values.\"\n        )\n\n    params_file = tmp_dir / f\"params.{suffix}\"\n    params_file.dump(\n        {\"foo\": [{\"bar\": 1}, {\"baz\": 2}], \"goo\": {\"bag\": 3.0}, \"lorem\": False}\n    )\n    apply_overrides(path=params_file.name, overrides=overrides)\n    assert params_file.parse() == expected\n\n\n@pytest.mark.parametrize(\n    \"overrides\",\n    [[\"foobar=2\"], [\"lorem=3,2\"], [\"+lorem=3\"], [\"foo[0]=bar\"]],\n)\ndef test_invalid_overrides(tmp_dir, overrides):\n    from dvc.utils.hydra import apply_overrides\n\n    params_file = tmp_dir / \"params.yaml\"\n    params_file.dump(\n        {\"foo\": [{\"bar\": 1}, {\"baz\": 2}], \"goo\": {\"bag\": 3.0}, \"lorem\": False}\n    )\n    with pytest.raises(InvalidArgumentError):\n        apply_overrides(path=params_file.name, overrides=overrides)\n\n\ndef hydra_setup(tmp_dir, config_dir, config_name):\n    config_dir = tmp_dir / config_dir\n    (config_dir / \"db\").mkdir(parents=True)\n    (config_dir / f\"{config_name}.yaml\").dump({\"defaults\": [{\"db\": \"mysql\"}]})\n    (config_dir / \"db\" / \"mysql.yaml\").dump(\n        {\"driver\": \"mysql\", \"user\": \"omry\", \"pass\": \"secret\"}\n    )\n    (config_dir / \"db\" / \"postgresql.yaml\").dump(\n        {\"driver\": \"postgresql\", \"user\": \"foo\", \"pass\": \"bar\", \"timeout\": 10}\n    )\n    return str(config_dir)\n\n\n@pytest.mark.parametrize(\"suffix\", [\"yaml\", \"toml\", \"json\"])\n@pytest.mark.parametrize(\n    \"overrides,expected\",\n    [\n        ([], {\"db\": {\"driver\": \"mysql\", \"user\": \"omry\", \"pass\": \"secret\"}}),\n        (\n            [\"db=postgresql\"],\n            {\n                \"db\": {\n                    \"driver\": \"postgresql\",\n                    \"user\": \"foo\",\n                    \"pass\": \"bar\",\n                    \"timeout\": 10,\n                }\n            },\n        ),\n        (\n            [\"db=postgresql\", \"db.timeout=20\"],\n            {\n                \"db\": {\n                    \"driver\": \"postgresql\",\n                    \"user\": \"foo\",\n                    \"pass\": \"bar\",\n                    \"timeout\": 20,\n                }\n            },\n        ),\n    ],\n)\ndef test_compose_and_dump_overrides(tmp_dir, suffix, overrides, expected):\n    from dvc.utils.hydra import compose_and_dump\n\n    config_name = \"config\"\n    output_file = tmp_dir / f\"params.{suffix}\"\n    config_dir = hydra_setup(tmp_dir, \"conf\", \"config\")\n    config_module = None\n    compose_and_dump(\n        output_file, config_dir, config_module, config_name, str(tmp_dir), overrides\n    )\n    assert output_file.parse() == expected\n\n\ndef hydra_setup_dir_basic(tmp_dir, config_subdir, config_name, config_content):\n    if config_subdir is None:\n        return None\n\n    config_dir = tmp_dir / config_subdir\n    config_dir.mkdir()\n    (config_dir / f\"{config_name}.yaml\").dump(config_content)\n    return str(config_dir)\n\n\n@pytest.mark.parametrize(\n    \"config_subdir,config_module,config_content,error_context\",\n    [\n        (\"conf\", None, {\"normal_yaml_config\": False}, does_not_raise()),\n        (\n            None,\n            \"hydra.test_utils.configs\",\n            {\"normal_yaml_config\": True},\n            does_not_raise(),\n        ),\n        (\n            \"conf\",\n            \"hydra.test_utils.configs\",\n            {\"normal_yaml_config\": False},\n            does_not_raise(),\n        ),\n        (\n            None,\n            None,\n            None,\n            pytest.raises(\n                ValueError,\n                match=re.escape(\n                    \"Either `config_dir` or `config_module` should be provided.\"\n                ),\n            ),\n        ),\n    ],\n)\ndef test_compose_and_dump_dir_module(\n    tmp_dir, config_subdir, config_module, config_content, error_context\n):\n    from dvc.utils.hydra import compose_and_dump\n\n    output_file = tmp_dir / \"params.yaml\"\n    config_name = \"config\"\n    config_dir = hydra_setup_dir_basic(\n        tmp_dir, config_subdir, config_name, config_content\n    )\n\n    with error_context:\n        compose_and_dump(\n            output_file, config_dir, config_module, config_name, str(tmp_dir), []\n        )\n        assert output_file.parse() == config_content\n\n\ndef test_compose_and_dump_yaml_handles_string(tmp_dir):\n    \"\"\"Regression test for https://github.com/treeverse/dvc/issues/8583\"\"\"\n    from dvc.utils.hydra import compose_and_dump\n\n    config = tmp_dir / \"conf\" / \"config.yaml\"\n    config.parent.mkdir()\n    config.write_text(\"foo: 'no'\\n\")\n    output_file = tmp_dir / \"params.yaml\"\n    compose_and_dump(output_file, str(config.parent), None, \"config\", str(tmp_dir), [])\n    assert output_file.read_text() == \"foo: 'no'\\n\"\n\n\ndef test_compose_and_dump_resolves_interpolation(tmp_dir):\n    \"\"\"Regression test for https://github.com/treeverse/dvc/issues/9196\"\"\"\n    from dvc.utils.hydra import compose_and_dump\n\n    config = tmp_dir / \"conf\" / \"config.yaml\"\n    config.parent.mkdir()\n    config.dump({\"data\": {\"root\": \"path/to/root\", \"raw\": \"${.root}/raw\"}})\n    output_file = tmp_dir / \"params.yaml\"\n    compose_and_dump(output_file, str(config.parent), None, \"config\", str(tmp_dir), [])\n    assert output_file.parse() == {\n        \"data\": {\"root\": \"path/to/root\", \"raw\": \"path/to/root/raw\"}\n    }\n\n\ndef test_compose_and_dump_plugins(tmp_dir):\n    \"\"\"Ensure Hydra plugins are loaded.\"\"\"\n    from hydra.core.plugins import Plugins\n\n    from dvc.utils.hydra import compose_and_dump\n\n    # clear cached plugins\n    Plugins._instances.pop(Plugins, None)\n\n    config = tmp_dir / \"conf\" / \"config.yaml\"\n    config.parent.mkdir()\n    config.write_text(\"foo: '${plus_10:1}'\\n\")\n\n    plugins = tmp_dir / \"hydra_plugins\"\n    plugins.mkdir()\n    (plugins / \"resolver.py\").write_text(\n        \"\"\"\\\nfrom omegaconf import OmegaConf\nOmegaConf.register_new_resolver('plus_10', lambda x: x + 10)\"\"\"\n    )\n\n    output_file = tmp_dir / \"params.yaml\"\n    compose_and_dump(output_file, str(config.parent), None, \"config\", str(tmp_dir), [])\n    assert output_file.read_text() == \"foo: 11\\n\"\n\n\n@pytest.mark.parametrize(\n    \"overrides, expected\",\n    [\n        (\n            {\"params.yaml\": [\"defaults/foo=1,2\"]},\n            [\n                {\"params.yaml\": [\"defaults/foo=1\"]},\n                {\"params.yaml\": [\"defaults/foo=2\"]},\n            ],\n        ),\n        (\n            {\"params.yaml\": [\"+foo=1,2\", \"~bar\", \"++foobar=5,6\"]},\n            [\n                {\"params.yaml\": [\"+foo=1\", \"~bar=null\", \"++foobar=5\"]},\n                {\"params.yaml\": [\"+foo=1\", \"~bar=null\", \"++foobar=6\"]},\n                {\"params.yaml\": [\"+foo=2\", \"~bar=null\", \"++foobar=5\"]},\n                {\"params.yaml\": [\"+foo=2\", \"~bar=null\", \"++foobar=6\"]},\n            ],\n        ),\n        (\n            {\"params.yaml\": [\"foo=1,2\", \"bar=3,4\"]},\n            [\n                {\"params.yaml\": [\"foo=1\", \"bar=3\"]},\n                {\"params.yaml\": [\"foo=1\", \"bar=4\"]},\n                {\"params.yaml\": [\"foo=2\", \"bar=3\"]},\n                {\"params.yaml\": [\"foo=2\", \"bar=4\"]},\n            ],\n        ),\n        (\n            {\"params.yaml\": [\"foo=choice(1,2)\"]},\n            [{\"params.yaml\": [\"foo=1\"]}, {\"params.yaml\": [\"foo=2\"]}],\n        ),\n        (\n            {\"params.yaml\": [\"foo=range(1, 3)\"]},\n            [{\"params.yaml\": [\"foo=1\"]}, {\"params.yaml\": [\"foo=2\"]}],\n        ),\n        (\n            {\"params.yaml\": [\"foo=1,2\"], \"others.yaml\": [\"bar=3\"]},\n            [\n                {\"params.yaml\": [\"foo=1\"], \"others.yaml\": [\"bar=3\"]},\n                {\"params.yaml\": [\"foo=2\"], \"others.yaml\": [\"bar=3\"]},\n            ],\n        ),\n        (\n            {\"params.yaml\": [\"foo=1,2\"], \"others.yaml\": [\"bar=3,4\"]},\n            [\n                {\"params.yaml\": [\"foo=1\"], \"others.yaml\": [\"bar=3\"]},\n                {\"params.yaml\": [\"foo=1\"], \"others.yaml\": [\"bar=4\"]},\n                {\"params.yaml\": [\"foo=2\"], \"others.yaml\": [\"bar=3\"]},\n                {\"params.yaml\": [\"foo=2\"], \"others.yaml\": [\"bar=4\"]},\n            ],\n        ),\n    ],\n)\ndef test_hydra_sweeps(overrides, expected):\n    from dvc.utils.hydra import get_hydra_sweeps\n\n    assert get_hydra_sweeps(overrides) == expected\n\n\ndef test_invalid_sweep():\n    from dvc.utils.hydra import get_hydra_sweeps\n\n    with pytest.raises(InvalidArgumentError):\n        get_hydra_sweeps({\"params.yaml\": [\"foo=glob(*)\"]})\n"
  },
  {
    "path": "tests/func/utils/test_strict_yaml.py",
    "content": "from os import curdir, pardir, sep\n\nimport pytest\nfrom ruamel.yaml import __with_libyaml__ as ruamel_clib\n\nfrom dvc.cli import main\n\nDUPLICATE_KEYS = \"\"\"\\\nstages:\n  stage1:\n    cmd: python train.py\n    cmd: python train.py\n\"\"\"\n\nDUPLICATE_KEYS_OUTPUT = \"\"\"\\\n'./dvc.yaml' is invalid.\n\nWhile constructing a mapping, in line 3, column 5\n  3 │   cmd: python train.py\n\nFound duplicate key \"cmd\" with value \"python train.py\" (original value:\\\n \"python\\ntrain.py\"), in line 4, column 5\n  4 │   cmd: python train.py\"\"\"\n\n\nMAPPING_VALUES_NOT_ALLOWED = \"\"\"\\\nstages:\n  stage1\n    cmd: python script.py\n\"\"\"\n\nMAPPING_VALUES_NOT_ALLOWED_OUTPUT = \"\"\"\\\n'./dvc.yaml' is invalid.\n\nMapping values are not allowed {}, in line 3, column 8\n  3 │   cmd: python script.py\"\"\".format(\"in this context\" if ruamel_clib else \"here\")\n\n\nNO_HYPHEN_INDICATOR_IN_BLOCK = \"\"\"\\\nstages:\n  stage1:\n    cmd: python script.py\n    outs:\n      - logs:\n          cache: false\n      metrics:\n\"\"\"\n\nNO_HYPHEN_INDICATOR_IN_BLOCK_OUTPUT = \"\"\"\\\n'./dvc.yaml' is invalid.\n\nWhile parsing a block collection, in line 5, column 7\n  5 │     - logs:\n\n{}, in line 7, column 7\n  7 │     metrics:\"\"\".format(\n    \"Did not find expected '-' indicator\"\n    if ruamel_clib\n    else \"Expected <block end>, but found '?'\"\n)\n\n\nUNCLOSED_SCALAR = \"\"\"\\\nstages:\n  stage1:\n    cmd: python script.py\n    desc: \"this is my stage one\n\"\"\"\n\nUNCLOSED_SCALAR_OUTPUT = \"\"\"\\\n'./dvc.yaml' is invalid.\n\nWhile scanning a quoted scalar, in line 4, column 11\n  4 │   desc: \"this is my stage one\n\nFound unexpected end of stream, in line 5, column 1\n  5\"\"\"\n\n\nNOT_A_DICT = \"3\"\nNOT_A_DICT_OUTPUT = \"'./dvc.yaml' validation failed: expected a dictionary.\\n\"\n\n\nEMPTY_STAGE = \"\"\"\\\nstages:\n  stage1:\n\"\"\"\n\nEMPTY_STAGE_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected a dictionary, in stages -> stage1, line 2, column 3\n  1 stages:\n  2   stage1:\n  3\"\"\"\n\n\nMISSING_CMD = \"\"\"\\\nstages:\n  stage1:\n    cmd: {}\n\"\"\"\n\nMISSING_CMD_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected str, in stages -> stage1 -> cmd, line 3, column 10\n  2   stage1:\n  3 │   cmd: {}\"\"\"\n\n\nDEPS_AS_DICT = \"\"\"\\\nstages:\n  stage1:\n    cmd: python script.py\n    deps:\n      - src:\n\"\"\"\n\nDEPS_AS_DICT_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected str, in stages -> stage1 -> deps -> 0, line 5, column 9\n  4 │   deps:\n  5 │     - src:\n\"\"\"\n\nOUTS_AS_STR = \"\"\"\\\nstages:\n  train:\n    cmd:\n      - python train.py\n    deps:\n      - config.cfg\n    outs:\n      models/\"\"\"\n\nOUTS_AS_STR_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected a list, in stages -> train -> outs, line 3, column 5\n  2   train:\n  3 │   cmd:\n  4 │     - python train.py\"\"\"\n\n\nNULL_VALUE_ON_OUTS = \"\"\"\\\nstages:\n  stage1:\n    cmd: python script.py\n    outs:\n    - logs:\n        cache: false\n        persist: true\n        remote:\n\"\"\"\n\nNULL_VALUE_ON_OUTS_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected str, in stages -> stage1 -> outs -> 0 -> logs -> remote, line 6, \\\ncolumn\\n9\n  5 │   - logs:\n  6 │   │   cache: false\n  7 │   │   persist: true\"\"\"\n\nADDITIONAL_KEY_ON_OUTS = \"\"\"\\\nstages:\n  stage1:\n    cmd: python script.py\n    outs:\n    - logs:\n        cache: false\n        not_existing_key: false\n\"\"\"\n\nADDITIONAL_KEY_ON_OUTS_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nextra keys not allowed, in stages -> stage1 -> outs -> 0 -> logs ->\nnot_existing_key, line 6, column 9\n  5 │   - logs:\n  6 │   │   cache: false\n  7 │   │   not_existing_key: false\"\"\"\n\n\nFOREACH_SCALAR_VALUE = \"\"\"\\\nstages:\n  group:\n    foreach: 3\n    do:\n      cmd: python script${i}.py\n\"\"\"\n\nFOREACH_SCALAR_VALUE_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected dict, in stages -> group -> foreach, line 3, column 5\n  2   group:\n  3 │   foreach: 3\n  4 │   do:\"\"\"\n\nFOREACH_DO_NULL = \"\"\"\\\nstages:\n  stage1:\n    foreach: [1,2,3]\n    do:\n\"\"\"\n\n\nFOREACH_DO_NULL_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nexpected a dictionary, in stages -> stage1 -> do, line 3, column 5\n  2   stage1:\n  3 │   foreach: [1,2,3]\n  4 │   do:\"\"\"\n\n\nFOREACH_DO_MISSING_CMD = \"\"\"\\\nstages:\n  stage1:\n    foreach: [1,2,3]\n    do:\n      outs:\n      - ${item}\n\"\"\"\n\n\nFOREACH_WITH_CMD_DO_MISSING = \"\"\"\\\nstages:\n  stage1:\n    foreach: [1,2,3]\n    cmd: python script${item}.py\n\"\"\"\n\n\nFOREACH_WITH_CMD_DO_MISSING_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed: 2 errors.\n\nextra keys not allowed, in stages -> stage1 -> cmd, line 3, column 5\n  2   stage1:\n  3 │   foreach: [1,2,3]\n  4 │   cmd: python script${item}.py\n\nrequired key not provided, in stages -> stage1 -> do, line 3, column 5\n  2   stage1:\n  3 │   foreach: [1,2,3]\n  4 │   cmd: python script${item}.py\"\"\"\n\n\nFOREACH_DO_MISSING_CMD_OUTPUT = \"\"\"\\\n'./dvc.yaml' validation failed.\n\nrequired key not provided, in stages -> stage1 -> do -> cmd, line 5, column 7\n  4 │   do:\n  5 │     outs:\n  6 │     - ${item}\"\"\"\n\n\nMERGE_CONFLICTS = \"\"\"\\\nstages:\n  load_data:\n<<<<<<< HEAD\n    cmd: python src/load_data.py\n    deps:\n    - src/load_data.py\n=======\n    cmd: python load_data.py\n    deps:\n    - load_data.py\n>>>>>>> branch\n    outs:\n    - data\n\"\"\"\n\nMERGE_CONFLICTS_OUTPUT = \"\"\"\\\n'./dvc.yaml' is invalid (possible merge conflicts).\n\nWhile scanning a simple key, in line 3, column 1\n  3 <<<<<<< HEAD\n\nCould not find expected ':', in line 4, column 8\n  4 │   cmd: python src/load_data.py\"\"\"\n\n\nexamples = {\n    # on parse errors\n    \"duplicate_keys\": (DUPLICATE_KEYS, DUPLICATE_KEYS_OUTPUT),\n    \"mapping_values_not_allowed\": (\n        MAPPING_VALUES_NOT_ALLOWED,\n        MAPPING_VALUES_NOT_ALLOWED_OUTPUT,\n    ),\n    \"no_hyphen_block\": (\n        NO_HYPHEN_INDICATOR_IN_BLOCK,\n        NO_HYPHEN_INDICATOR_IN_BLOCK_OUTPUT,\n    ),\n    \"unclosed_scalar\": (UNCLOSED_SCALAR, UNCLOSED_SCALAR_OUTPUT),\n    # schema validation errors\n    \"not_a_dict\": (NOT_A_DICT, NOT_A_DICT_OUTPUT),\n    \"empty_stage\": (EMPTY_STAGE, EMPTY_STAGE_OUTPUT),\n    \"missing_cmd\": (MISSING_CMD, MISSING_CMD_OUTPUT),\n    \"deps_as_dict\": (DEPS_AS_DICT, DEPS_AS_DICT_OUTPUT),\n    \"outs_as_str\": (OUTS_AS_STR, OUTS_AS_STR_OUTPUT),\n    \"null_value_on_outs\": (NULL_VALUE_ON_OUTS, NULL_VALUE_ON_OUTS_OUTPUT),\n    \"additional_key_on_outs\": (ADDITIONAL_KEY_ON_OUTS, ADDITIONAL_KEY_ON_OUTS_OUTPUT),\n    \"foreach_scalar\": (FOREACH_SCALAR_VALUE, FOREACH_SCALAR_VALUE_OUTPUT),\n    \"foreach_do_do_null\": (FOREACH_DO_NULL, FOREACH_DO_NULL_OUTPUT),\n    \"foreach_do_missing_cmd\": (FOREACH_DO_MISSING_CMD, FOREACH_DO_MISSING_CMD_OUTPUT),\n    \"foreach_unknown_cmd_missing_do\": (\n        FOREACH_WITH_CMD_DO_MISSING,\n        FOREACH_WITH_CMD_DO_MISSING_OUTPUT,\n    ),\n    # merge conflicts\n    \"merge_conflicts\": (MERGE_CONFLICTS, MERGE_CONFLICTS_OUTPUT),\n}\n\n\n@pytest.fixture\ndef force_posixpath(mocker):\n    # make it always return posix path, easier for validating error messages\n    mocker.patch(\"dvc.utils.strictyaml.make_relpath\", return_value=\"./dvc.yaml\")\n\n\n@pytest.fixture\ndef fixed_width_term(mocker):\n    \"\"\"Fixed width console.\"\"\"\n    from rich.console import Console, ConsoleDimensions\n\n    mocker.patch.object(\n        Console,\n        \"size\",\n        new_callable=mocker.PropertyMock(return_value=ConsoleDimensions(80, 25)),\n    )\n\n\n@pytest.mark.parametrize(\"text, expected\", examples.values(), ids=examples.keys())\ndef test_exceptions(\n    tmp_dir, dvc, capsys, force_posixpath, fixed_width_term, text, expected\n):\n    tmp_dir.gen(\"dvc.yaml\", text)\n\n    capsys.readouterr()  # clear outputs\n    assert main([\"stage\", \"list\"]) != 0\n    out, err = capsys.readouterr()\n\n    assert not out\n\n    # strip whitespace on the right: output is always left-justified\n    # by rich.syntax.Syntax:\n    for expected_line, err_line in zip(expected.splitlines(), err.splitlines()):\n        assert expected_line == err_line.rstrip(\" \")\n\n\n@pytest.mark.parametrize(\n    \"text, expected\",\n    [\n        (DUPLICATE_KEYS, \"'./dvc.yaml' is invalid in revision '{short_rev}'.\"),\n        (MISSING_CMD, \"'./dvc.yaml' validation failed in revision '{short_rev}'.\"),\n    ],\n)\ndef test_on_revision(\n    tmp_dir, scm, dvc, force_posixpath, fixed_width_term, capsys, text, expected\n):\n    tmp_dir.scm_gen(\"dvc.yaml\", text, commit=\"add dvc.yaml\")\n    capsys.readouterr()  # clear outputs\n\n    assert main([\"ls\", f\"file://{tmp_dir.as_posix()}\", \"--rev\", \"HEAD\"]) != 0\n\n    out, err = capsys.readouterr()\n    assert not out\n    assert expected.format(short_rev=scm.get_rev()[:7]) in err\n\n\ndef test_make_relpath(tmp_dir, dvc, scm, monkeypatch):\n    from dvc.utils.strictyaml import make_relpath\n\n    gitfs = scm.get_fs(\"HEAD\")\n    path = tmp_dir / \"dvc.yaml\"\n    expected_path = curdir + sep + \"dvc.yaml\"\n    assert make_relpath(path) == expected_path\n    assert make_relpath(\"/dvc.yaml\", gitfs) == expected_path\n\n    (tmp_dir / \"dir\").mkdir(exist_ok=True)\n    monkeypatch.chdir(\"dir\")\n    gitfs.chdir(\"/dir\")\n\n    expected_path = pardir + sep + \"dvc.yaml\"\n    assert make_relpath(path) == expected_path\n    assert make_relpath(\"/dvc.yaml\", gitfs) == expected_path\n\n\ndef test_fallback_exception_message(tmp_dir, dvc, mocker, caplog):\n    # When trying to pretty print exception messages, we fallback to old way\n    # of printing things.\n    mocker.patch(\n        \"dvc.utils.strictyaml.YAMLSyntaxError.__pretty_exc__\", side_effect=ValueError\n    )\n    mocker.patch(\n        \"dvc.utils.strictyaml.YAMLValidationError.__pretty_exc__\",\n        side_effect=ValueError,\n    )\n\n    # syntax errors\n    dvc_file = tmp_dir / \"dvc.yaml\"\n    dvc_file.write_text(MAPPING_VALUES_NOT_ALLOWED)\n    assert main([\"stage\", \"list\"]) != 0\n    assert \"unable to read: 'dvc.yaml', YAML file structure is corrupted\" in caplog.text\n\n    caplog.clear()\n    # validation error\n    dvc_file.dump({\"stages\": {\"stage1\": None}})\n    assert main([\"stage\", \"list\"]) != 0\n    assert \"dvc.yaml' validation failed\" in caplog.text\n"
  },
  {
    "path": "tests/integration/__init__.py",
    "content": ""
  },
  {
    "path": "tests/integration/conftest.py",
    "content": "from tests.unit.repo.experiments.conftest import exp_stage, test_queue  # noqa: F401\n"
  },
  {
    "path": "tests/integration/plots/__init__.py",
    "content": ""
  },
  {
    "path": "tests/integration/plots/conftest.py",
    "content": "import pytest\n\n\n@pytest.fixture\ndef repo_with_plots(tmp_dir, scm, dvc, run_copy_metrics):\n    def make():\n        linear_v1 = [\n            {\"x\": 1, \"y\": 0.1},\n            {\"x\": 2, \"y\": 0.2},\n            {\"x\": 3, \"y\": 0.3},\n        ]\n\n        confusion_v1 = [\n            {\"actual\": 0, \"predicted\": 1},\n            {\"actual\": 0, \"predicted\": 1},\n            {\"actual\": 1, \"predicted\": 0},\n            {\"actual\": 1, \"predicted\": 0},\n        ]\n\n        image_v1 = b\"content\"\n\n        (tmp_dir / \"linear_src.json\").dump_json(linear_v1)\n        (tmp_dir / \"confusion_src.json\").dump_json(confusion_v1)\n        (tmp_dir / \"image_src.png\").write_bytes(image_v1)\n\n        scm.add([\"linear_src.json\", \"confusion_src.json\", \"image_src.png\"])\n        scm.commit(\"add data sources\")\n\n        run_copy_metrics(\n            \"linear_src.json\",\n            \"linear.json\",\n            name=\"linear\",\n            plots=[\"linear.json\"],\n            commit=\"linear\",\n        )\n        run_copy_metrics(\n            \"confusion_src.json\",\n            \"confusion.json\",\n            name=\"confusion\",\n            plots=[\"confusion.json\"],\n            commit=\"confusion\",\n        )\n        linear_props = {\"title\": \"linear\", \"x\": \"x\"}\n        dvc.plots.modify(\"linear.json\", linear_props)\n        confusion_props = {\n            \"title\": \"confusion matrix\",\n            \"x\": \"predicted\",\n            \"y\": \"actual\",\n            \"template\": \"confusion\",\n        }\n        dvc.plots.modify(\"confusion.json\", confusion_props)\n        run_copy_metrics(\n            \"image_src.png\",\n            \"image.png\",\n            name=\"image\",\n            plots=[\"image.png\"],\n            commit=\"image\",\n        )\n\n        scm.add([\"dvc.yaml\", \"dvc.lock\"])\n        scm.commit(\"commit dvc files\")\n        yield image_v1, linear_v1, confusion_v1, confusion_props\n        linear_v2 = [\n            {\"x\": 1, \"y\": 0.2},\n            {\"x\": 2, \"y\": 0.3},\n            {\"x\": 3, \"y\": 0.4},\n        ]\n        confusion_v2 = [\n            {\"actual\": 0, \"predicted\": 0},\n            {\"actual\": 0, \"predicted\": 0},\n            {\"actual\": 1, \"predicted\": 1},\n            {\"actual\": 1, \"predicted\": 1},\n        ]\n        image_v2 = b\"content2\"\n\n        (tmp_dir / \"linear_src.json\").dump_json(linear_v2)\n        (tmp_dir / \"confusion_src.json\").dump_json(confusion_v2)\n        (tmp_dir / \"image_src.png\").write_bytes(image_v2)\n\n        dvc.reproduce()\n        yield image_v2, linear_v2, confusion_v2, confusion_props\n\n    return make\n\n\n@pytest.fixture\ndef repo_with_config_plots(tmp_dir, scm, dvc, run_copy_metrics):\n    def make():\n        # test subdir functionality\n        linear_subdir_a_v1 = [\n            {\"x\": 1, \"y\": 0.2},\n            {\"x\": 2, \"y\": 0.3},\n            {\"x\": 3, \"y\": 0.4},\n        ]\n        # test subdir default values for x = step\n        linear_subdir_b_v1 = [\n            {\"step\": 1, \"y\": 0.2},\n            {\"step\": 2, \"y\": 0.3},\n            {\"step\": 3, \"y\": 0.4},\n        ]\n        linear_train_v1 = [\n            {\"x\": 1, \"y\": 0.1},\n            {\"x\": 2, \"y\": 0.2},\n            {\"x\": 3, \"y\": 0.3},\n        ]\n        linear_test_v1 = [\n            {\"x\": 1, \"y\": 0.3},\n            {\"x\": 2, \"y\": 0.4},\n            {\"x\": 3, \"y\": 0.5},\n        ]\n\n        confusion_train_v1 = [\n            {\"actual\": 0, \"predicted\": 1},\n            {\"actual\": 0, \"predicted\": 1},\n            {\"actual\": 1, \"predicted\": 0},\n            {\"actual\": 1, \"predicted\": 0},\n        ]\n        confusion_test_v1 = [\n            {\"actual\": 0, \"predicted\": 1},\n            {\"actual\": 0, \"predicted\": 0},\n            {\"actual\": 1, \"predicted\": 1},\n            {\"actual\": 1, \"predicted\": 0},\n        ]\n\n        (tmp_dir / \"linear_subdir_a_src.json\").dump_json(linear_subdir_a_v1)\n        (tmp_dir / \"linear_subdir_b_src.json\").dump_json(linear_subdir_b_v1)\n        (tmp_dir / \"linear_train_src.json\").dump_json(linear_train_v1)\n        (tmp_dir / \"linear_test_src.json\").dump_json(linear_test_v1)\n        (tmp_dir / \"confusion_train_src.json\").dump_json(confusion_train_v1)\n        (tmp_dir / \"confusion_test_src.json\").dump_json(confusion_test_v1)\n\n        scm.add(\n            [\n                \"linear_subdir_a_src.json\",\n                \"linear_subdir_b_src.json\",\n                \"linear_train_src.json\",\n                \"linear_test_src.json\",\n                \"confusion_train_src.json\",\n                \"confusion_test_src.json\",\n            ]\n        )\n        scm.commit(\"add data sources\")\n\n        (tmp_dir / \"subdirA\").mkdir()\n        (tmp_dir / \"subdirB\").mkdir()\n\n        run_copy_metrics(\n            \"linear_subdir_a_src.json\",\n            \"subdirA/linear_subdir.json\",\n            name=\"linear_subdir\",\n            outs=[\"subdirA/linear_subdir.json\"],\n            commit=\"linear_subdir\",\n        )\n        run_copy_metrics(\n            \"linear_subdir_b_src.json\",\n            \"subdirB/linear_subdir.json\",\n            name=\"linear_subdir\",\n            outs=[\"subdirB/linear_subdir.json\"],\n            commit=\"linear_subdir\",\n        )\n        run_copy_metrics(\n            \"linear_train_src.json\",\n            \"linear_train.json\",\n            name=\"linear_train\",\n            outs=[\"linear_train.json\"],\n            commit=\"linear_train\",\n        )\n        run_copy_metrics(\n            \"linear_test_src.json\",\n            \"linear_test.json\",\n            name=\"linear_test\",\n            outs=[\"linear_test.json\"],\n            commit=\"linear_test\",\n        )\n        run_copy_metrics(\n            \"confusion_train_src.json\",\n            \"confusion_train.json\",\n            name=\"confusion_train\",\n            outs=[\"confusion_train.json\"],\n            commit=\"confusion_train\",\n        )\n        run_copy_metrics(\n            \"confusion_test_src.json\",\n            \"confusion_test.json\",\n            name=\"confusion_test\",\n            outs=[\"confusion_test.json\"],\n            commit=\"confusion_test\",\n        )\n\n        subdir_a_config = {\n            \"x\": \"x\",\n            \"y\": \"y\",\n            \"title\": \"subdir plots with x and y defined\",\n        }\n\n        subdir_b_config = {\"title\": \"subdir plots with default x and y\"}\n\n        other_plots_config = [\n            {\n                \"linear_train_vs_test\": {\n                    \"x\": \"x\",\n                    \"y\": {\"linear_train.json\": \"y\", \"linear_test.json\": \"y\"},\n                    \"title\": \"linear plot\",\n                }\n            },\n            {\n                \"confusion_train_vs_test\": {\n                    \"x\": \"actual\",\n                    \"y\": {\n                        \"confusion_train.json\": \"predicted\",\n                        \"confusion_test.json\": \"predicted\",\n                    },\n                    \"template\": \"confusion\",\n                }\n            },\n        ]\n\n        from dvc.utils.serialize import modify_yaml\n\n        with modify_yaml(\"dvc.yaml\") as dvcfile_content:\n            dvcfile_content[\"plots\"] = [\n                {\"subdirA\": subdir_a_config},\n                {\"subdirB\": subdir_b_config},\n                *other_plots_config,\n            ]\n\n        scm.add([\"dvc.yaml\", \"dvc.lock\"])\n        scm.commit(\"commit dvc files\")\n\n        # remove generate .gitignore that are considered as plot otherwise\n        (tmp_dir / \"subdirA\" / \".gitignore\").unlink()\n        (tmp_dir / \"subdirB\" / \".gitignore\").unlink()\n\n        yield {\n            \"data\": {\n                \"subdirA/linear_subdir.json\": linear_subdir_a_v1,\n                \"subdirB/linear_subdir.json\": linear_subdir_b_v1,\n                \"linear_train.json\": linear_train_v1,\n                \"linear_test.json\": linear_test_v1,\n                \"confusion_train.json\": confusion_train_v1,\n                \"confusion_test.json\": confusion_test_v1,\n            },\n            \"configs\": {\n                \"dvc.yaml\": [\n                    {\"subdirA/linear_subdir.json\": subdir_a_config},\n                    {\"subdirB/linear_subdir.json\": subdir_b_config},\n                    *other_plots_config,\n                ]\n            },\n        }\n\n    return make\n\n\n@pytest.fixture\ndef repo_with_dvclive_plots(tmp_dir, scm, dvc, run_copy_metrics):\n    def make():\n        metrics = [\n            {\"step\": 1, \"metric\": 0.1},\n            {\"step\": 2, \"metric\": 0.2},\n            {\"step\": 3, \"metric\": 0.3},\n        ]\n\n        metrics_path = tmp_dir / \"dvclive\" / \"plots\" / \"metrics\" / \"metric.tsv\"\n        metrics_path.parent.mkdir(parents=True)\n        metrics_path.dump_json(metrics)\n\n        plots_config_v1 = [{\"plots/metrics\": {\"x\": \"step\"}}]\n\n        from dvc.utils.serialize import modify_yaml\n\n        dvcyaml_path_v1 = tmp_dir / \"dvclive\" / \"dvc.yaml\"\n        with modify_yaml(dvcyaml_path_v1) as dvcfile_content:\n            dvcfile_content[\"plots\"] = plots_config_v1\n        scm.add([metrics_path, dvcyaml_path_v1])\n        scm.commit(\"add dvclive 2.x plots\")\n\n        yield {\n            \"data\": {metrics_path: metrics},\n            \"configs\": {dvcyaml_path_v1: plots_config_v1},\n        }\n\n        plots_config_v2 = [{\"dvclive/plots/metrics\": {\"x\": \"step\"}}]\n\n        dvcyaml_path_v1.unlink()\n        dvcyaml_path_v2 = tmp_dir / \"dvc.yaml\"\n        with modify_yaml(dvcyaml_path_v2) as dvcfile_content:\n            dvcfile_content[\"plots\"] = plots_config_v2\n\n        yield {\n            \"data\": {metrics_path: metrics},\n            \"configs\": {dvcyaml_path_v2: plots_config_v2},\n        }\n\n    return make\n"
  },
  {
    "path": "tests/integration/plots/test_plots.py",
    "content": "import json\nimport os\nfrom copy import deepcopy\nfrom urllib.parse import urlparse\nfrom urllib.request import url2pathname\n\nimport dpath\nimport pytest\nfrom bs4 import BeautifulSoup\nfrom funcy import first\n\nfrom dvc.cli import main\nfrom dvc.render import ANCHOR_DEFINITIONS, FILENAME, REVISION\n\nJSON_OUT = \"vis_data\"\n\n\ndef call(capsys, subcommand=\"show\"):\n    capsys.readouterr()\n    assert main([\"plots\", subcommand, \"--json\", \"-o\", JSON_OUT, \"--split\"]) == 0\n    split_json_out, _ = capsys.readouterr()\n\n    split_json_result = json.loads(split_json_out)\n\n    capsys.readouterr()\n    assert main([\"plots\", subcommand, \"--json\", \"-o\", JSON_OUT]) == 0\n    json_out, _ = capsys.readouterr()\n\n    json_result = json.loads(json_out)\n\n    assert main([\"plots\", subcommand]) == 0\n    html_path_out, _ = capsys.readouterr()\n\n    parsed = urlparse(html_path_out.strip())\n    abspath = url2pathname(parsed.path)\n    return abspath, json_result, split_json_result\n\n\ndef extract_vega_specs(html_path, plots_ids):\n    from dvc_render.base import Renderer\n\n    result = {}\n\n    with open(html_path, encoding=\"utf-8\") as fd:\n        content = fd.read()\n\n    reader = BeautifulSoup(content, features=\"html.parser\")\n    for plot_id in plots_ids:\n        script = _remove_blanks(\n            reader.find(\"div\", id=Renderer.remove_special_chars(plot_id)).script.text\n        )\n        result[plot_id] = json.loads(\n            script.split(\"; vegaEmbed\")[0].replace(\"var spec = \", \"\")\n        )\n\n    return result\n\n\ndef drop_fields(datapoints: list[dict], fields: list[str]):\n    tmp = deepcopy(datapoints)\n    for datapoint in tmp:\n        keys = set(datapoint.keys())\n        for key in keys:\n            if key in fields:\n                datapoint.pop(key)\n    return tmp\n\n\ndef verify_image(path, version, filename, content, html_path, json_result):\n    assert os.path.exists(html_path)\n    with open(html_path, encoding=\"utf-8\") as fd:\n        html_content = fd.read()\n\n    image_data = {}\n    for datapoint in json_result[filename]:\n        if datapoint[\"revisions\"] == [version]:\n            image_data = datapoint\n            break\n\n    assert image_data, f\"{version} data for {filename} was not found\"\n    assert image_data[\"type\"] == \"image\"\n    output_filename = filename.replace(\"/\", \"_\")\n    output_name = f\"{version}_{output_filename}\"\n    assert image_data[\"url\"] == str(path / JSON_OUT / output_name)\n    assert (path / JSON_OUT / output_name).read_bytes() == content\n\n    assert os.path.join(\"static\", output_name) in html_content\n\n    # there should be no absolute paths in produced HTML\n    # TODO uncomment once dvc-render is adjusted\n    # assert str(path) not in html_content\n    assert (path / \"dvc_plots\" / \"static\" / output_name).read_bytes() == content\n\n\ndef _remove_blanks(text: str):\n    return \" \".join(text.replace(\"\\t\", \"\").replace(\"\\n\", \"\").split())\n\n\ndef verify_vega(\n    versions, html_result, json_result, split_json_result, title, x_label, y_label\n):\n    if isinstance(versions, str):\n        versions = [versions]\n\n    for j in [json_result, split_json_result]:\n        assert len(j) == 1\n        assert j[0][\"type\"] == \"vega\"\n        assert set(j[0][\"revisions\"]) == set(versions)\n\n    assert (\n        json_result[0][\"content\"][\"data\"][\"values\"]\n        == split_json_result[0][ANCHOR_DEFINITIONS][\"<DVC_METRIC_DATA>\"]\n    )\n\n    assert set(versions) == set(json_result[0][\"revisions\"])\n\n    assert json_result[0][\"content\"][\"data\"][\"values\"]\n    assert html_result[\"data\"][\"values\"]\n\n    content_str = json.dumps(split_json_result[0][\"content\"])\n    assert \"<DVC_METRIC_DATA>\" in content_str\n    assert \"<DVC_METRIC_X_LABEL>\" in content_str\n    assert \"<DVC_METRIC_Y_LABEL>\" in content_str\n\n    def _assert_templates_equal(\n        html_template, filled_template, split_template, title, x_label, y_label\n    ):\n        # besides split anchors, json and split json should be equal\n        paths = [[\"data\", \"values\"], [\"encoding\", \"color\"]]\n        tmp1 = deepcopy(html_template)\n        tmp2 = deepcopy(filled_template)\n        tmp3 = json.loads(\n            json.dumps(split_template)\n            .replace('\"<DVC_METRIC_PLOT_HEIGHT>\"', \"300\")\n            .replace('\"<DVC_METRIC_PLOT_WIDTH>\"', \"300\")\n            .replace(\"<DVC_METRIC_TITLE>\", title)\n            .replace(\"<DVC_METRIC_X_LABEL>\", x_label)\n            .replace(\"<DVC_METRIC_Y_LABEL>\", y_label)\n            .replace(\n                '\"<DVC_METRIC_ZOOM_AND_PAN>\"',\n                json.dumps({\"name\": \"grid\", \"select\": \"interval\", \"bind\": \"scales\"}),\n            )\n        )\n        for path in paths:\n            dpath.set(tmp1, path, {})\n            dpath.set(tmp2, path, {})\n            dpath.set(tmp3, path, {})\n\n        assert tmp1 == tmp2 == tmp3\n\n    _assert_templates_equal(\n        html_result,\n        json_result[0][\"content\"],\n        split_json_result[0][\"content\"],\n        title,\n        x_label,\n        y_label,\n    )\n\n\ndef verify_vega_props(plot_id, json_result, title, x, y, **kwargs):\n    data = json_result[plot_id]\n    assert len(data) == 1\n    data = first(data)\n\n    assert dpath.get(data, [\"content\", \"title\", \"text\"]) == title\n\n    try:\n        # TODO confusion_matrix_plot - need to find better way of asserting\n        #      encoding as its place is not constant in vega\n        plot_x = dpath.get(data, [\"content\", \"spec\", \"encoding\", \"x\", \"field\"])\n        plot_y = dpath.get(data, [\"content\", \"spec\", \"encoding\", \"y\", \"field\"])\n    except KeyError:\n        # default plot\n        plot_x = dpath.get(data, [\"content\", \"layer\", 0, \"encoding\", \"x\", \"field\"])\n        plot_y = dpath.get(data, [\"content\", \"layer\", 0, \"encoding\", \"y\", \"field\"])\n\n    assert plot_x == x\n    assert plot_y == y\n\n\ndef _update_datapoints(datapoints: list, update: dict):\n    result = []\n    for dp in datapoints:\n        tmp = dp.copy()\n        tmp.update(update)\n        result.append(tmp)\n    return result\n\n\n@pytest.mark.vscode\ndef test_no_plots(tmp_dir, scm, dvc, capsys):\n    html_path, json_result, split_json_result = call(capsys)\n    assert not os.path.exists(html_path)\n    assert json_result == {}\n    assert split_json_result == {}\n\n\n@pytest.mark.vscode\ndef test_repo_with_plots(tmp_dir, scm, dvc, capsys, run_copy_metrics, repo_with_plots):\n    repo_state = repo_with_plots()\n\n    image_v1, linear_v1, confusion_v1, confusion_props = next(repo_state)\n\n    html_path, json_result, split_json_result = call(capsys)\n    html_result = extract_vega_specs(html_path, [\"linear.json\", \"confusion.json\"])\n\n    assert \"errors\" not in json_result\n    assert \"errors\" not in split_json_result\n\n    json_data = json_result[\"data\"]\n    split_json_data = split_json_result[\"data\"]\n\n    assert json_data[\"linear.json\"][0][\"content\"][\"data\"][\n        \"values\"\n    ] == _update_datapoints(\n        linear_v1,\n        {\n            REVISION: \"workspace\",\n        },\n    )\n    assert html_result[\"linear.json\"][\"data\"][\"values\"] == _update_datapoints(\n        linear_v1,\n        {\n            REVISION: \"workspace\",\n        },\n    )\n    assert json_data[\"confusion.json\"][0][\"content\"][\"data\"][\n        \"values\"\n    ] == _update_datapoints(\n        confusion_v1,\n        {\n            REVISION: \"workspace\",\n        },\n    )\n    assert html_result[\"confusion.json\"][\"data\"][\"values\"] == _update_datapoints(\n        confusion_v1,\n        {\n            REVISION: \"workspace\",\n        },\n    )\n    verify_image(tmp_dir, \"workspace\", \"image.png\", image_v1, html_path, json_data)\n\n    for plot, title, x_label, y_label in [\n        (\"linear.json\", \"linear\", \"x\", \"y\"),\n        (\"confusion.json\", \"confusion matrix\", \"predicted\", \"actual\"),\n    ]:\n        verify_vega(\n            \"workspace\",\n            html_result[plot],\n            json_data[plot],\n            split_json_data[plot],\n            title,\n            x_label,\n            y_label,\n        )\n\n    verify_vega_props(\"confusion.json\", json_data, **confusion_props)\n\n    image_v2, linear_v2, confusion_v2, confusion_props = next(repo_state)\n\n    html_path, json_result, split_json_result = call(capsys, subcommand=\"diff\")\n    html_result = extract_vega_specs(html_path, [\"linear.json\", \"confusion.json\"])\n\n    assert \"errors\" not in json_result\n    assert \"errors\" not in split_json_result\n\n    json_data = json_result[\"data\"]\n    split_json_data = split_json_result[\"data\"]\n\n    verify_image(tmp_dir, \"workspace\", \"image.png\", image_v2, html_path, json_data)\n    verify_image(tmp_dir, \"HEAD\", \"image.png\", image_v1, html_path, json_data)\n\n    for plot, title, x_label, y_label in [\n        (\"linear.json\", \"linear\", \"x\", \"y\"),\n        (\"confusion.json\", \"confusion matrix\", \"predicted\", \"actual\"),\n    ]:\n        verify_vega(\n            [\"HEAD\", \"workspace\"],\n            html_result[plot],\n            json_data[plot],\n            split_json_data[plot],\n            title,\n            x_label,\n            y_label,\n        )\n    verify_vega_props(\"confusion.json\", json_data, **confusion_props)\n    path = tmp_dir / \"subdir\"\n    path.mkdir()\n    with path.chdir():\n        html_path, json_result, split_json_result = call(capsys, subcommand=\"diff\")\n        html_result = extract_vega_specs(\n            html_path,\n            [\"../linear.json\", \"../confusion.json\"],\n        )\n\n        assert \"errors\" not in json_result\n        assert \"errors\" not in split_json_result\n\n        json_data = json_result[\"data\"]\n        split_json_data = split_json_result[\"data\"]\n        assert json_data[\"../linear.json\"][0][\"content\"][\"data\"][\n            \"values\"\n        ] == _update_datapoints(\n            linear_v2,\n            {\n                REVISION: \"workspace\",\n            },\n        ) + _update_datapoints(\n            linear_v1,\n            {\n                REVISION: \"HEAD\",\n            },\n        )\n        assert html_result[\"../linear.json\"][\"data\"][\"values\"] == _update_datapoints(\n            linear_v2,\n            {\n                REVISION: \"workspace\",\n            },\n        ) + _update_datapoints(\n            linear_v1,\n            {\n                REVISION: \"HEAD\",\n            },\n        )\n        assert json_data[\"../confusion.json\"][0][\"content\"][\"data\"][\n            \"values\"\n        ] == _update_datapoints(\n            confusion_v2,\n            {\n                REVISION: \"workspace\",\n            },\n        ) + _update_datapoints(\n            confusion_v1,\n            {\n                REVISION: \"HEAD\",\n            },\n        )\n        assert html_result[\"../confusion.json\"][\"data\"][\"values\"] == _update_datapoints(\n            confusion_v2,\n            {\n                REVISION: \"workspace\",\n            },\n        ) + _update_datapoints(\n            confusion_v1,\n            {\n                REVISION: \"HEAD\",\n            },\n        )\n\n        for plot, title, x_label, y_label in [\n            (\"../linear.json\", \"linear\", \"x\", \"y\"),\n            (\"../confusion.json\", \"confusion matrix\", \"predicted\", \"actual\"),\n        ]:\n            verify_vega(\n                [\"HEAD\", \"workspace\"],\n                html_result[plot],\n                json_data[plot],\n                split_json_data[plot],\n                title,\n                x_label,\n                y_label,\n            )\n        verify_image(path, \"workspace\", \"../image.png\", image_v2, html_path, json_data)\n        verify_image(path, \"HEAD\", \"../image.png\", image_v1, html_path, json_data)\n\n\n@pytest.mark.vscode\ndef test_repo_with_removed_plots(tmp_dir, capsys, repo_with_plots):\n    from dvc.utils.fs import remove\n\n    next(repo_with_plots())\n\n    # even if there is no data, call should be successful\n    remove(tmp_dir / \".dvc\" / \"cache\")\n    remove(\"linear.json\")\n    remove(\"confusion.json\")\n    remove(\"image.png\")\n\n    for s in (\"show\", \"diff\"):\n        _, json_result, split_json_result = call(capsys, subcommand=s)\n        errors = [\n            {\n                \"name\": p,\n                \"source\": p,\n                \"rev\": \"workspace\",\n                \"type\": \"FileNotFoundError\",\n                \"msg\": f\"[Errno 2] No storage files available: '{p}'\",\n            }\n            for p in [\n                \"linear.json\",\n                \"confusion.json\",\n                \"image.png\",\n            ]\n        ]\n        expected_result = {\n            \"errors\": errors,\n            \"data\": {\n                \"image.png\": [],\n                \"confusion.json\": [],\n                \"linear.json\": [],\n            },\n        }\n        assert json_result == expected_result\n        assert split_json_result == expected_result\n\n\ndef test_config_output_dir(tmp_dir, dvc, capsys):\n    subdir = tmp_dir / \"subdir\"\n    ret = main([\"config\", \"plots.out_dir\", os.fspath(subdir)])\n    assert ret == 0\n\n    metric = [{\"first_val\": 100, \"val\": 2}, {\"first_val\": 200, \"val\": 3}]\n    (tmp_dir / \"metric.json\").dump_json(metric, sort_keys=True)\n\n    assert main([\"plots\", \"show\", \"metric.json\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert subdir.as_uri() in out\n    assert subdir.is_dir()\n    assert (subdir / \"index.html\").is_file()\n\n    cli_arg_subdir = tmp_dir / \"cli_option\"\n    assert main([\"plots\", \"show\", \"-o\", os.fspath(cli_arg_subdir), \"metric.json\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert cli_arg_subdir.as_uri() in out\n    assert cli_arg_subdir.is_dir()\n    assert (cli_arg_subdir / \"index.html\").is_file()\n\n\n@pytest.mark.vscode\ndef test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots):\n    repo_state = repo_with_config_plots()\n    plots = next(repo_state)\n\n    html_path, _, split_json_result = call(capsys)\n\n    assert os.path.exists(html_path)\n    html_result = extract_vega_specs(\n        html_path,\n        [\n            \"subdirA/linear_subdir.json\",\n            \"subdirB/linear_subdir.json\",\n            \"linear_train_vs_test\",\n            \"confusion_train_vs_test\",\n        ],\n    )\n\n    ble = _update_datapoints(\n        plots[\"data\"][\"linear_train.json\"],\n        {\n            REVISION: \"workspace\",\n            FILENAME: \"linear_train.json\",\n        },\n    ) + _update_datapoints(\n        plots[\"data\"][\"linear_test.json\"],\n        {\n            REVISION: \"workspace\",\n            FILENAME: \"linear_test.json\",\n        },\n    )\n\n    assert html_result[\"linear_train_vs_test\"][\"data\"][\"values\"] == ble\n    assert (\n        split_json_result[\"data\"][\"linear_train_vs_test\"][0][ANCHOR_DEFINITIONS][\n            \"<DVC_METRIC_DATA>\"\n        ]\n        == ble\n    )\n    assert (\n        html_result[\"subdirA/linear_subdir.json\"][\"layer\"][1][\"encoding\"][\"x\"][\"field\"]\n        == \"x\"\n    )\n    assert (\n        html_result[\"subdirA/linear_subdir.json\"][\"layer\"][1][\"encoding\"][\"y\"][\"field\"]\n        == \"y\"\n    )\n    assert (\n        html_result[\"subdirB/linear_subdir.json\"][\"layer\"][1][\"encoding\"][\"x\"][\"field\"]\n        == \"step\"\n    )\n    assert (\n        html_result[\"subdirB/linear_subdir.json\"][\"layer\"][1][\"encoding\"][\"y\"][\"field\"]\n        == \"y\"\n    )\n\n\n@pytest.mark.vscode\ndef test_repo_with_dvclive_plots(tmp_dir, capsys, repo_with_dvclive_plots):\n    next(repo_with_dvclive_plots())\n\n    for s in (\"show\", \"diff\"):\n        _, json_result, split_json_result = call(capsys, subcommand=s)\n        expected_result: dict[str, dict[str, list[str]]] = {\n            \"data\": {\n                \"dvclive/plots/metrics/metric.tsv\": [],\n            },\n        }\n        assert json_result == expected_result\n        assert split_json_result == expected_result\n\n\n@pytest.mark.vscode\ndef test_nested_x_defn_collection(tmp_dir, dvc, scm, capsys):\n    rel_pipeline_dir = \"pipelines/data-increment\"\n    pipeline_rel_dvclive_metrics_dir = \"dvclive/plots/metrics\"\n    pipeline_rel_other_logger_dir = \"other/logger\"\n\n    dvc_rel_dvclive_metrics_dir = (\n        f\"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}\"\n    )\n    dvc_rel_other_logger_dir = f\"{rel_pipeline_dir}/{pipeline_rel_other_logger_dir}\"\n\n    pipeline_dir = tmp_dir / rel_pipeline_dir\n    dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir\n    dvclive_metrics_dir.mkdir(parents=True)\n    other_logger_dir = pipeline_dir / pipeline_rel_other_logger_dir\n    other_logger_dir.mkdir(parents=True)\n\n    (pipeline_dir / \"dvc.yaml\").dump(\n        {\n            \"plots\": [\n                {\n                    \"Error vs max_leaf_nodes\": {\n                        \"template\": \"simple\",\n                        \"x\": {\n                            f\"{pipeline_rel_dvclive_metrics_dir}\"\n                            \"/Max_Leaf_Nodes.tsv\": \"Max_Leaf_Nodes\"\n                        },\n                        \"y\": {f\"{pipeline_rel_dvclive_metrics_dir}/Error.tsv\": \"Error\"},\n                    }\n                },\n                {\n                    f\"{pipeline_rel_other_logger_dir}/multiple_metrics.json\": {\n                        \"x\": \"x\",\n                        \"y\": [\"y1\", \"y2\"],\n                    },\n                },\n                {\n                    f\"{pipeline_rel_dvclive_metrics_dir}/Error.tsv\": {\"y\": [\"Error\"]},\n                },\n                {\n                    \"max leaf nodes\": {\n                        \"y\": {\n                            f\"{pipeline_rel_dvclive_metrics_dir}\"\n                            \"/Max_Leaf_Nodes.tsv\": \"Max_Leaf_Nodes\"\n                        }\n                    },\n                },\n            ]\n        },\n    )\n    dvclive_metrics_dir.gen(\n        {\n            \"Error.tsv\": \"step\\tError\\n0\\t0.11\\n1\\t0.22\\n2\\t0.44\\n\",\n            \"Max_Leaf_Nodes.tsv\": \"step\\tMax_Leaf_Nodes\\n0\\t5\\n1\\t50\\n2\\t500\\n\",\n        }\n    )\n    (other_logger_dir / \"multiple_metrics.json\").dump(\n        [\n            {\"x\": 0, \"y1\": 0.1, \"y2\": 10},\n            {\"x\": 1, \"y1\": 0.2, \"y2\": 22},\n        ]\n    )\n\n    scm.commit(\"add dvc.yaml and metrics\")\n\n    _, _, split_json_result = call(capsys, subcommand=\"diff\")\n    assert len(split_json_result.keys()) == 1\n    assert len(split_json_result[\"data\"].keys()) == 4\n\n    separate_x_file = split_json_result[\"data\"][\"Error vs max_leaf_nodes\"][0]\n\n    assert separate_x_file[\"anchor_definitions\"][\"<DVC_METRIC_DATA>\"] == [\n        {\"Error\": \"0.11\", \"Max_Leaf_Nodes\": \"5\", \"step\": \"0\", \"rev\": \"workspace\"},\n        {\"Error\": \"0.22\", \"Max_Leaf_Nodes\": \"50\", \"step\": \"1\", \"rev\": \"workspace\"},\n        {\"Error\": \"0.44\", \"Max_Leaf_Nodes\": \"500\", \"step\": \"2\", \"rev\": \"workspace\"},\n    ]\n\n    same_x_file = split_json_result[\"data\"][\n        f\"{dvc_rel_other_logger_dir}/multiple_metrics.json\"\n    ][0]\n    assert same_x_file[\"anchor_definitions\"][\"<DVC_METRIC_DATA>\"] == [\n        {\n            \"x\": 0,\n            \"y1\": 0.1,\n            \"y2\": 10,\n            \"dvc_inferred_y_value\": 0.1,\n            \"field\": \"y1\",\n            \"rev\": \"workspace\",\n        },\n        {\n            \"x\": 1,\n            \"y1\": 0.2,\n            \"y2\": 22,\n            \"dvc_inferred_y_value\": 0.2,\n            \"field\": \"y1\",\n            \"rev\": \"workspace\",\n        },\n        {\n            \"x\": 0,\n            \"y1\": 0.1,\n            \"y2\": 10,\n            \"dvc_inferred_y_value\": 10,\n            \"field\": \"y2\",\n            \"rev\": \"workspace\",\n        },\n        {\n            \"x\": 1,\n            \"y1\": 0.2,\n            \"y2\": 22,\n            \"dvc_inferred_y_value\": 22,\n            \"field\": \"y2\",\n            \"rev\": \"workspace\",\n        },\n    ]\n\n    inferred_x_from_str = split_json_result[\"data\"][\n        f\"{dvc_rel_dvclive_metrics_dir}/Error.tsv\"\n    ][0]\n    assert inferred_x_from_str[\"anchor_definitions\"][\"<DVC_METRIC_DATA>\"] == [\n        {\"step\": 0, \"Error\": \"0.11\", \"rev\": \"workspace\"},\n        {\"step\": 1, \"Error\": \"0.22\", \"rev\": \"workspace\"},\n        {\"step\": 2, \"Error\": \"0.44\", \"rev\": \"workspace\"},\n    ]\n\n    inferred_x_from_dict = split_json_result[\"data\"][\"max leaf nodes\"][0]\n    assert inferred_x_from_dict[\"anchor_definitions\"][\"<DVC_METRIC_DATA>\"] == [\n        {\"step\": 0, \"Max_Leaf_Nodes\": \"5\", \"rev\": \"workspace\"},\n        {\"step\": 1, \"Max_Leaf_Nodes\": \"50\", \"rev\": \"workspace\"},\n        {\"step\": 2, \"Max_Leaf_Nodes\": \"500\", \"rev\": \"workspace\"},\n    ]\n\n\ndef test_plots_empty_directory(tmp_dir, dvc, scm, capsys):\n    (tmp_dir / \"empty\").mkdir()\n    (tmp_dir / \"dvc.yaml\").dump({\"plots\": [{\"empty\": {}}]})\n\n    scm.add([\"dvc.yaml\"])\n    scm.commit(\"commit dvc files\")\n\n    html_path, _, split_json_result = call(capsys)\n    assert split_json_result == {}\n    assert html_path == \"\"\n"
  },
  {
    "path": "tests/integration/plots/test_repo_plots_api.py",
    "content": "import pytest\nfrom funcy import merge\n\nfrom tests.utils.plots import get_plot\n\n\n@pytest.mark.studio\ndef test_api(tmp_dir, dvc, repo_with_plots):\n    repo_state = repo_with_plots()\n    image_v1, linear_v1, confusion_v1, confusion_params = next(repo_state)\n\n    workspace_data = next(dvc.plots.collect())\n\n    assert get_plot(workspace_data, \"workspace\", file=\"image.png\", endkey=\"props\") == {}\n    image_source = get_plot(\n        workspace_data, \"workspace\", file=\"image.png\", endkey=\"data_source\"\n    )\n    assert callable(image_source)\n    assert image_source() == {\"data\": image_v1}\n\n    assert get_plot(\n        workspace_data, \"workspace\", file=\"linear.json\", endkey=\"props\"\n    ) == {\"title\": \"linear\", \"x\": \"x\"}\n    linear_source = get_plot(\n        workspace_data, \"workspace\", file=\"linear.json\", endkey=\"data_source\"\n    )\n    assert callable(linear_source)\n    assert linear_source() == {\"data\": linear_v1}\n\n    assert (\n        get_plot(workspace_data, \"workspace\", file=\"confusion.json\", endkey=\"props\")\n        == confusion_params\n    )\n    confusion_source = get_plot(\n        workspace_data,\n        \"workspace\",\n        file=\"confusion.json\",\n        endkey=\"data_source\",\n    )\n    assert callable(confusion_source)\n    assert confusion_source() == {\"data\": confusion_v1}\n\n    image_v2, linear_v2, confusion_v2, _ = next(repo_state)\n    data_generator = dvc.plots.collect(revs=[\"workspace\", \"HEAD\"])\n\n    workspace_data = next(data_generator)\n\n    assert get_plot(workspace_data, \"workspace\", file=\"image.png\", endkey=\"props\") == {}\n    image_source = get_plot(\n        workspace_data, \"workspace\", file=\"image.png\", endkey=\"data_source\"\n    )\n    assert callable(image_source)\n    assert image_source() == {\"data\": image_v2}\n\n    assert get_plot(\n        workspace_data, \"workspace\", file=\"linear.json\", endkey=\"props\"\n    ) == {\"title\": \"linear\", \"x\": \"x\"}\n    linear_source = get_plot(\n        workspace_data, \"workspace\", file=\"linear.json\", endkey=\"data_source\"\n    )\n    assert callable(linear_source)\n    assert linear_source() == {\"data\": linear_v2}\n\n    assert (\n        get_plot(workspace_data, \"workspace\", file=\"confusion.json\", endkey=\"props\")\n        == confusion_params\n    )\n    confusion_source = get_plot(\n        workspace_data,\n        \"workspace\",\n        file=\"confusion.json\",\n        endkey=\"data_source\",\n    )\n    assert callable(confusion_source)\n    assert confusion_source() == {\"data\": confusion_v2}\n\n    head_data = next(data_generator)\n\n    assert get_plot(head_data, \"HEAD\", file=\"image.png\", endkey=\"props\") == {}\n    image_source = get_plot(head_data, \"HEAD\", file=\"image.png\", endkey=\"data_source\")\n    assert callable(image_source)\n    assert image_source() == {\"data\": image_v1}\n\n    assert get_plot(head_data, \"HEAD\", file=\"linear.json\", endkey=\"props\") == {\n        \"title\": \"linear\",\n        \"x\": \"x\",\n    }\n    linear_source = get_plot(\n        head_data, \"HEAD\", file=\"linear.json\", endkey=\"data_source\"\n    )\n    assert callable(linear_source)\n    assert linear_source() == {\"data\": linear_v1}\n\n    assert (\n        get_plot(head_data, \"HEAD\", file=\"confusion.json\", endkey=\"props\")\n        == confusion_params\n    )\n    confusion_source = get_plot(\n        head_data, \"HEAD\", file=\"confusion.json\", endkey=\"data_source\"\n    )\n    assert callable(confusion_source)\n    assert confusion_source() == {\"data\": confusion_v1}\n\n\n@pytest.mark.studio\ndef test_api_with_config_plots(tmp_dir, dvc, capsys, repo_with_config_plots):\n    repo_state = repo_with_config_plots()\n    plots_state = next(repo_state)\n\n    plots_data = next(dvc.plots.collect())\n\n    assert get_plot(\n        plots_data, \"workspace\", typ=\"definitions\", file=\"dvc.yaml\"\n    ) == merge(*plots_state[\"configs\"][\"dvc.yaml\"])\n\n    for file in plots_state[\"data\"]:\n        data_source = get_plot(plots_data, \"workspace\", file=file, endkey=\"data_source\")\n        assert callable(data_source)\n        assert data_source() == {\"data\": plots_state[\"data\"][file]}\n"
  },
  {
    "path": "tests/integration/test_studio_live_experiments.py",
    "content": "import pytest\nfrom funcy import first\n\nfrom dvc.env import (\n    DVC_EXP_GIT_REMOTE,\n    DVC_STUDIO_OFFLINE,\n    DVC_STUDIO_REPO_URL,\n    DVC_STUDIO_TOKEN,\n    DVC_STUDIO_URL,\n)\nfrom dvc.repo import Repo\nfrom dvc.testing.scripts import COPY_SCRIPT\nfrom dvc.utils.studio import get_subrepo_relpath\nfrom dvc_studio_client import env, post_live_metrics\n\n\n@pytest.mark.studio\n@pytest.mark.parametrize(\"tmp\", [True, False])\n@pytest.mark.parametrize(\"offline\", [True, False])\n@pytest.mark.parametrize(\"dvc_exp_git_remote\", [None, \"DVC_EXP_GIT_REMOTE\"])\ndef test_post_to_studio(\n    tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp, offline, dvc_exp_git_remote\n):\n    valid_response = mocker.MagicMock()\n    valid_response.status_code = 200\n    live_metrics = mocker.spy(post_live_metrics, \"post_live_metrics\")\n    mocked_post = mocker.patch(\"requests.post\", return_value=valid_response)\n\n    monkeypatch.setenv(DVC_STUDIO_REPO_URL, \"STUDIO_REPO_URL\")\n    monkeypatch.setenv(DVC_STUDIO_TOKEN, \"STUDIO_TOKEN\")\n    monkeypatch.setenv(DVC_STUDIO_URL, \"https://0.0.0.0\")\n    monkeypatch.setenv(DVC_STUDIO_OFFLINE, str(offline))\n    if dvc_exp_git_remote:\n        monkeypatch.setenv(DVC_EXP_GIT_REMOTE, dvc_exp_git_remote)\n\n    baseline_sha = scm.get_rev()\n    exp_rev = first(\n        dvc.experiments.run(exp_stage.addressing, params=[\"foo=1\"], tmp_dir=tmp)\n    )\n    name = dvc.experiments.get_exact_name([exp_rev])[exp_rev]\n\n    assert live_metrics.call_count == 2\n    start_call, done_call = live_metrics.call_args_list\n\n    if offline:\n        assert mocked_post.call_count == 0\n\n    else:\n        start_call, done_call = live_metrics.call_args_list\n        assert start_call.kwargs[\"dvc_studio_config\"][\"token\"] == \"STUDIO_TOKEN\"\n        assert start_call.kwargs[\"dvc_studio_config\"][\"repo_url\"] == \"STUDIO_REPO_URL\"\n\n        assert mocked_post.call_count == 2\n\n        start_call, done_call = mocked_post.call_args_list\n\n        assert start_call.kwargs[\"json\"] == {\n            \"type\": \"start\",\n            \"repo_url\": dvc_exp_git_remote or \"STUDIO_REPO_URL\",\n            \"baseline_sha\": baseline_sha,\n            \"name\": name,\n            \"params\": {\"params.yaml\": {\"foo\": 1}},\n            \"client\": \"dvc\",\n        }\n\n        assert done_call.kwargs[\"json\"] == {\n            \"type\": \"done\",\n            \"repo_url\": dvc_exp_git_remote or \"STUDIO_REPO_URL\",\n            \"baseline_sha\": baseline_sha,\n            \"name\": name,\n            \"client\": \"dvc\",\n            \"experiment_rev\": exp_rev,\n            \"metrics\": {\"metrics.yaml\": {\"data\": {\"foo\": 1}}},\n        }\n\n\n@pytest.mark.studio\n@pytest.mark.parametrize(\"tmp\", [True, False])\ndef test_post_to_studio_subdir(tmp_dir, scm, mocker, monkeypatch, tmp):\n    live_exp_subdir = \"project_a\"\n\n    tmp_dir.scm_gen(\n        {\n            live_exp_subdir: {\n                \"params.yaml\": \"foo: 1\",\n                \"metrics.yaml\": \"foo: 1\",\n                \"copy.py\": COPY_SCRIPT.encode(\"utf-8\"),\n            },\n        },\n        commit=\"git init\",\n    )\n\n    project_a_dvc = Repo.init(tmp_dir / live_exp_subdir, subdir=True)\n    with monkeypatch.context() as m:\n        m.chdir(project_a_dvc.root_dir)\n\n        exp_stage = project_a_dvc.run(\n            cmd=\"python copy.py params.yaml metrics.yaml\",\n            metrics_no_cache=[\"metrics.yaml\"],\n            params=[\"foo\"],\n            name=\"copy-file\",\n        )\n\n        scm.add(\n            [\n                \".gitignore\",\n                \"copy.py\",\n                \"dvc.lock\",\n                \"dvc.yaml\",\n                \"metrics.yaml\",\n                \"params.yaml\",\n            ]\n        )\n        scm.commit(\"dvc init project_a\")\n\n    valid_response = mocker.MagicMock()\n    valid_response.status_code = 200\n    mocked_post = mocker.patch(\"requests.post\", return_value=valid_response)\n\n    monkeypatch.setenv(env.STUDIO_ENDPOINT, \"https://0.0.0.0\")\n    monkeypatch.setenv(env.STUDIO_REPO_URL, \"STUDIO_REPO_URL\")\n    monkeypatch.setenv(env.STUDIO_TOKEN, \"STUDIO_TOKEN\")\n\n    baseline_sha = scm.get_rev()\n    with monkeypatch.context() as m:\n        m.chdir(project_a_dvc.root_dir)\n        exp_rev = first(\n            project_a_dvc.experiments.run(\n                exp_stage.addressing, params=[\"foo=24\"], tmp_dir=tmp\n            )\n        )\n\n    name = project_a_dvc.experiments.get_exact_name([exp_rev])[exp_rev]\n    project_a_dvc.close()\n    assert mocked_post.call_count == 2\n\n    start_call = mocked_post.call_args_list[0]\n\n    assert start_call.kwargs[\"json\"] == {\n        \"type\": \"start\",\n        \"repo_url\": \"STUDIO_REPO_URL\",\n        \"baseline_sha\": baseline_sha,\n        \"name\": name,\n        \"params\": {\"params.yaml\": {\"foo\": 24}},\n        \"subdir\": live_exp_subdir,\n        \"client\": \"dvc\",\n    }\n\n\n@pytest.mark.studio\ndef test_monorepo_relpath(tmp_dir, scm):\n    from dvc.repo.destroy import destroy\n\n    tmp_dir.gen({\"project_a\": {}, \"subdir/project_b\": {}})\n\n    non_monorepo = Repo.init(tmp_dir)\n    assert get_subrepo_relpath(non_monorepo) == \"\"\n\n    destroy(non_monorepo)\n\n    monorepo_project_a = Repo.init(tmp_dir / \"project_a\", subdir=True)\n\n    assert get_subrepo_relpath(monorepo_project_a) == \"project_a\"\n\n    monorepo_project_b = Repo.init(tmp_dir / \"subdir\" / \"project_b\", subdir=True)\n\n    assert get_subrepo_relpath(monorepo_project_b) == \"subdir/project_b\"\n\n\n@pytest.mark.studio\ndef test_virtual_monorepo_relpath(tmp_dir, scm):\n    from dvc.fs.git import GitFileSystem\n    from dvc.repo.destroy import destroy\n\n    tmp_dir.gen({\"project_a\": {}, \"subdir/project_b\": {}})\n    scm.commit(\"initial commit\")\n    gfs = GitFileSystem(scm=scm, rev=\"master\")\n\n    non_monorepo = Repo.init(tmp_dir)\n    non_monorepo.fs = gfs\n    non_monorepo.root_dir = \"/\"\n\n    assert get_subrepo_relpath(non_monorepo) == \"\"\n\n    destroy(non_monorepo)\n\n    monorepo_project_a = Repo.init(tmp_dir / \"project_a\", subdir=True)\n    monorepo_project_a.fs = gfs\n    monorepo_project_a.root_dir = \"/project_a\"\n\n    assert get_subrepo_relpath(monorepo_project_a) == \"project_a\"\n\n    monorepo_project_b = Repo.init(tmp_dir / \"subdir\" / \"project_b\", subdir=True)\n    monorepo_project_b.fs = gfs\n    monorepo_project_b.root_dir = \"/subdir/project_b\"\n\n    assert get_subrepo_relpath(monorepo_project_b) == \"subdir/project_b\"\n"
  },
  {
    "path": "tests/remotes/__init__.py",
    "content": "from dvc_ssh.tests.fixtures import make_ssh, ssh, ssh_server  # noqa: F401\n\nfrom .git_server import git_server, git_ssh  # noqa: F401\n\nTEST_REMOTE = \"upstream\"\nTEST_CONFIG = {\n    \"cache\": {},\n    \"core\": {\"remote\": TEST_REMOTE},\n    \"remote\": {TEST_REMOTE: {\"url\": \"\"}},\n}\n"
  },
  {
    "path": "tests/remotes/git-init/git.sh",
    "content": "#!/bin/bash\napk add --no-cache git\n"
  },
  {
    "path": "tests/remotes/git_server.py",
    "content": "import pytest\n\nfrom dvc_ssh.tests.cloud import SSH, TEST_SSH_KEY_PATH, TEST_SSH_USER\n\n\nclass GitSSH(SSH):\n    @staticmethod\n    def get_url(host, port):\n        return f\"ssh://{host}:{port}/tmp/data/git\"\n\n\n@pytest.fixture\ndef git_server(request, test_config):\n    import asyncssh\n    from sshfs import SSHFileSystem\n\n    test_config.requires(\"ssh\")\n    docker_services = request.getfixturevalue(\"docker_services\")\n    conn_info = {\n        \"host\": \"127.0.0.1\",\n        \"port\": docker_services.port_for(\"git-server\", 2222),\n    }\n\n    def get_fs():\n        return SSHFileSystem(\n            **conn_info,\n            username=TEST_SSH_USER,\n            client_keys=[TEST_SSH_KEY_PATH],\n        )\n\n    def _check():\n        try:\n            fs = get_fs()\n            fs.exists(\"/\")\n            fs.execute(\"git --version\")\n        except asyncssh.Error:\n            return False\n        else:\n            return True\n\n    docker_services.wait_until_responsive(timeout=30.0, pause=1, check=_check)\n    return conn_info\n\n\n@pytest.fixture\ndef git_ssh_connection(git_server):\n    from sshfs import SSHFileSystem\n\n    return SSHFileSystem(\n        host=git_server[\"host\"],\n        port=git_server[\"port\"],\n        username=TEST_SSH_USER,\n        client_keys=[TEST_SSH_KEY_PATH],\n    )\n\n\n@pytest.fixture\ndef git_ssh(git_server, monkeypatch):\n    url = GitSSH(GitSSH.get_url(**git_server))\n    url.mkdir(exist_ok=True, parents=True)\n    return url\n"
  },
  {
    "path": "tests/remotes/user.key",
    "content": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpgIBAAKCAQEA/HlMR0chgFLx6A/BmYi9Ypj9nr0kZ3Wo6n3ZdSXORJaH6e7d\nLSw+0eoD1NDCxDORZlwYNAg45zndU5sPN4IvVcAvC/FD0qSK1H0ku1p4QV82y4a1\nSOnzRlDUIVhdQxnlQa8gI+zDO8AyzOJ4oZ9LL7Hy+mIDqGxRwHeKfDYXuHkE+aM9\nCCkidPqR/Uqxag03+y51MEphC07mr7mrDb4lOmeqy8Xq0ZcUjIkmKiGCVVRJO6g4\nkYFwr0UwpKCwLzOfw4Fy2SpzWfe3IdVUqIro97d1AGC+9OUQETiVpBYObBI1qfQo\nef6MT+CFcTV40cyKdCG6x57f6gpPMYqYfdHTyQIDAQABAoIBAQDkzgTL3/HDeugS\nWB0qyFphvaazMlSIkn/3qv/lA9MQI5+e6MN3Cc8Qq9S3DE5GQzm1GycwGHeBTdZ/\ny1maA5hkTRwV5ZuCjW3nrlYYmJ+9Fs3w2u712leHVP86DPvQMOqsgUpOZGZ2gvNG\n7MNILbWUzt8V/Le17hyUoYFWmisbGH0UyiRJsUHoAdZFdRq+sDVsFmu49fhcEsLI\n5B0fB2v5FSwgSuIfYNjPNEJG9xK+qZzjEXd5g7XVs5mbrMflbyOwDDf6/nMILGnO\nZvtCagqaH0SaxnEzucsdFdnGBhzGEbPXxDVxuS7kcdWpEN+f0JksYaVaLetCsD3s\nxudTxI3RAoGBAP85vkIaG2WEurONLURLAiM3dNxP4KXonADqn8RnJ5TNpn4Il9g5\nwiuANjja7Cvf6GrvDRl7lrphAQ/c/6iRl6tPv8Hvertd7RXmG9QoLl/qG3ghjNDl\nWUexvloZ7NXqKq9PSj7vq76cxIfSklAEQ+x1ldalVaS9iR+ZqKryY8plAoGBAP09\natBWJPapFoci7Mrml4N8TNY+n7unD/yyoLabmm0YfPbBy9x9SyCbefVwB5F1dWeV\nMZc3oG6EMKg/BH/vnKpiNtXk4OubIbqJD3jHrqeT6wAkSItgVnlU6t6FLuKjLXoD\nBuw3oM/+i2pDy8oSsxueBv1GyMZG0ixcShkeXPuVAoGBAMFFHMo5st1hcXBeTBUX\nJ/s7F4duBZQdXWVkRrAX3WVVheqS30miE2OVp3nObmGbIQk5FRZi/HUO2BsHI6Km\n/c+AiJl3m90e91ZJ9nDmLJf9U+fYoCXgR4d/FcJtN2eV99ThmjumisvBMyIXVyy4\nzibVtC3i7cPes2P2nD83ZlHxAoGBAN+VSCckx4HXjAJH/ZSuvnriVdyaceD2ARF0\njJxtCYzkoAAk3l6PaLMjUiw2exgcAkov2RbPkB/DKkqBSPHDliiAijWS3FpoHwFY\nXYaflj5yRHtdjYcwyWhaZvuLzvdeZppg7c3E14CMFn792IFSvTvW7AjWZBFbGdj8\nqpc+zY15AoGBANvsGFHmurP+VUu1ibegREBySts0WGNyq5VOTCrkN9S7AKR1pZNI\nI6W7KrRcXEvbBM7B47ykbE40hAachxN1Rpk+9qEom6etTaw/yMewgFNjZXYJw06z\n4bq6ofjKK8VqCWx41pWcmXj7Fa2A43RvZWg8TlX7Q8uc4wTBpTkuzfZC\n-----END RSA PRIVATE KEY-----\n"
  },
  {
    "path": "tests/remotes/user.key.pub",
    "content": "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQD8eUxHRyGAUvHoD8GZiL1imP2evSRndajqfdl1Jc5Elofp7t0tLD7R6gPU0MLEM5FmXBg0CDjnOd1Tmw83gi9VwC8L8UPSpIrUfSS7WnhBXzbLhrVI6fNGUNQhWF1DGeVBryAj7MM7wDLM4nihn0svsfL6YgOobFHAd4p8Nhe4eQT5oz0IKSJ0+pH9SrFqDTf7LnUwSmELTuavuasNviU6Z6rLxerRlxSMiSYqIYJVVEk7qDiRgXCvRTCkoLAvM5/DgXLZKnNZ97ch1VSoiuj3t3UAYL705RAROJWkFg5sEjWp9Ch5/oxP4IVxNXjRzIp0IbrHnt/qCk8xiph90dPJ user@unicorn\n"
  },
  {
    "path": "tests/remotes_env.sample",
    "content": "# Uncomment and fill in to test against S3\n# export AWS_ACCESS_KEY_ID=\"...SET-ME...\"\n# export AWS_SECRET_ACCESS_KEY=\"...SET-ME...\"\n# export DVC_TEST_AWS_REPO_BUCKET=\"...SET-ME...\"\n\n# Uncomment and set a bucket name to test against Google Storage\n# export GOOGLE_APPLICATION_CREDENTIALS=\"scripts/ci/gcp-creds.json\"\n\n# Uncomment to test against Microsoft Azure via Azurite\n# export AZURE_STORAGE_CONTAINER_NAME=\"dvc-test\"\n# export AZURE_STORAGE_CONNECTION_STRING=\"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;\"\n\n# Uncomment and fill in to test against Alibaba OSS\n# export OSS_ENDPOINT=\"...SET-ME..\"\n# export OSS_ACCESS_KEY_ID=\"...SET-ME..\"\n# export OSS_ACCESS_KEY_SECRET=\"...SET-ME..\"\n# export DVC_TEST_OSS=1\n# export DVC_TEST_OSS_REPO_BUCKET=\"...SET-ME...\"\n"
  },
  {
    "path": "tests/scripts.py",
    "content": "import pytest\n\nfrom dvc.testing.scripts import _add_script\n\n\n@pytest.fixture\ndef append_foo_script(tmp_dir):\n    return _add_script(\n        tmp_dir,\n        \"append_foo.py\",\n        \"\"\"\nimport sys\nfrom pathlib import Path\n\nwith Path(sys.argv[1]).open(\"a+\", encoding=\"utf-8\") as f:\n    f.write(\"foo\")\n\"\"\".replace(\"\\r\\n\", \"\\n\"),\n    )\n\n\n@pytest.fixture\ndef head_script(tmp_dir):\n    \"\"\"Output first line of each file to different file with '-1' appended.\n    Useful for tracking multiple outputs/dependencies which are not a copy\n    of each others.\n    \"\"\"\n    return _add_script(\n        tmp_dir,\n        \"head.py\",\n        \"\"\"\nimport sys\nfor file in sys.argv[1:]:\n    with open(file) as f, open(file +\"-1\",\"w+\") as w:\n        w.write(f.readline())\n\"\"\".replace(\"\\r\\n\", \"\\n\"),\n    )\n"
  },
  {
    "path": "tests/unit/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/cli/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/cli/test_main.py",
    "content": "from argparse import Namespace\n\nimport pytest\nfrom funcy import raiser\n\nfrom dvc.cli import main\nfrom dvc_data.hashfile.build import IgnoreInCollectedDirError\nfrom dvc_data.hashfile.cache import DiskError\nfrom dvc_objects.fs.base import FileSystem, RemoteMissingDepsError\n\n\ndef test_state_pickle_errors_are_correctly_raised(tmp_dir, caplog, mocker):\n    path = tmp_dir / \"dir\" / \"test\"\n    mocker.patch(\n        \"dvc.cli.parse_args\",\n        return_value=Namespace(\n            func=raiser(DiskError(path, \"md5s\")),\n            quiet=False,\n            verbose=True,\n        ),\n    )\n\n    assert main() == 255\n    assert (\n        \"Could not open pickled 'md5s' cache.\\n\"\n        f\"Remove the '{path.relative_to(tmp_dir)}' directory \"\n        \"and then retry this command.\\n\"\n        \"See <https://error.dvc.org/pickle> for more information.\" in caplog.text\n    )\n\n\n@pytest.mark.parametrize(\n    \"pkg, msg\",\n    [\n        (None, \"Please report this bug to\"),\n        (\"pip\", \"pip install 'dvc[proto]'\"),\n        (\"conda\", \"conda install -c conda-forge dvc-proto\"),\n    ],\n)\ndef test_remote_missing_deps_are_correctly_reported(tmp_dir, caplog, mocker, pkg, msg):\n    error = RemoteMissingDepsError(FileSystem(), \"proto\", \"proto://\", [\"deps\"])\n    mocker.patch(\"dvc.PKG\", pkg)\n    mocker.patch(\n        \"dvc.cli.parse_args\",\n        return_value=Namespace(func=raiser(error), quiet=False, verbose=True),\n    )\n\n    assert main() == 255\n    expected = (\n        \"URL 'proto://' is supported but requires these missing dependencies: \"\n        \"['deps']. \"\n    )\n    if pkg:\n        expected += (\n            \"To install dvc with those dependencies, run:\\n\\n\"\n            f\"\\t{msg}\\n\\n\"\n            \"See <https://dvc.org/doc/install> for more info.\"\n        )\n    else:\n        expected += (\n            \"\\nPlease report this bug to \"\n            \"<https://github.com/treeverse/dvc/issues>. \"\n            \"Thank you!\"\n        )\n    assert expected in caplog.text\n\n\ndef test_ignore_in_collected_dir_error_is_logged(tmp_dir, caplog, mocker):\n    error = IgnoreInCollectedDirError(\".dvcignore\", \"dir\")\n    mocker.patch(\n        \"dvc.cli.parse_args\",\n        return_value=Namespace(func=raiser(error), quiet=False, verbose=True),\n    )\n    assert main() == 255\n    expected = \".dvcignore file should not be in collected dir path: 'dir'\"\n    assert expected in caplog.text\n"
  },
  {
    "path": "tests/unit/command/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/command/ls/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/command/ls/test_ls.py",
    "content": "import json\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.ls import CmdList, show_tree\n\n\ndef _test_cli(mocker, *args):\n    cli_args = parse_args([\"list\", *args])\n    assert cli_args.func == CmdList\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.ls\")\n\n    assert cmd.run() == 0\n    return m\n\n\ndef test_list(mocker):\n    url = \"local_dir\"\n    m = _test_cli(mocker, url)\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=False,\n        rev=None,\n        dvc_only=False,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=None,\n    )\n\n\ndef test_list_recursive(mocker):\n    url = \"local_dir\"\n    m = _test_cli(mocker, url, \"-R\")\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=True,\n        rev=None,\n        dvc_only=False,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=None,\n    )\n\n\ndef test_list_git_ssh_rev(mocker):\n    url = \"git@github.com:repo\"\n    m = _test_cli(mocker, url, \"--rev\", \"123\")\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=False,\n        rev=\"123\",\n        dvc_only=False,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=None,\n    )\n\n\ndef test_list_targets(mocker):\n    url = \"local_dir\"\n    target = \"subdir\"\n    m = _test_cli(mocker, url, target)\n    m.assert_called_once_with(\n        url,\n        target,\n        recursive=False,\n        rev=None,\n        dvc_only=False,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=None,\n    )\n\n\ndef test_list_outputs_only(mocker):\n    url = \"local_dir\"\n    m = _test_cli(mocker, url, None, \"--dvc-only\")\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=False,\n        rev=None,\n        dvc_only=True,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=None,\n    )\n\n\ndef test_list_config(mocker):\n    url = \"local_dir\"\n    m = _test_cli(\n        mocker,\n        url,\n        None,\n        \"--config\",\n        \"myconfig\",\n        \"--remote\",\n        \"myremote\",\n        \"--remote-config\",\n        \"k1=v1\",\n        \"k2=v2\",\n    )\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=False,\n        rev=None,\n        dvc_only=False,\n        config=\"myconfig\",\n        remote=\"myremote\",\n        remote_config={\"k1\": \"v1\", \"k2\": \"v2\"},\n        maxdepth=None,\n    )\n\n\ndef test_list_level(mocker):\n    url = \"local_dir\"\n    m = _test_cli(mocker, url, None, \"--level\", \"1\")\n    m.assert_called_once_with(\n        url,\n        None,\n        recursive=False,\n        rev=None,\n        dvc_only=False,\n        config=None,\n        remote=None,\n        remote_config=None,\n        maxdepth=1,\n    )\n\n\ndef test_list_tree(mocker):\n    url = \"git@github.com:repo\"\n    cli_args = parse_args(\n        [\n            \"list\",\n            url,\n            \"local_dir\",\n            \"--rev\",\n            \"123\",\n            \"--tree\",\n            \"--show-hash\",\n            \"--size\",\n            \"--level\",\n            \"2\",\n            \"--dvc-only\",\n            \"--config\",\n            \"myconfig\",\n            \"--remote\",\n            \"myremote\",\n            \"--remote-config\",\n            \"k1=v1\",\n            \"k2=v2\",\n        ]\n    )\n    assert cli_args.func == CmdList\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.ls.ls_tree\")\n\n    assert cmd.run() == 0\n    m.assert_called_once_with(\n        url,\n        \"local_dir\",\n        rev=\"123\",\n        dvc_only=True,\n        config=\"myconfig\",\n        remote=\"myremote\",\n        remote_config={\"k1\": \"v1\", \"k2\": \"v2\"},\n        maxdepth=2,\n    )\n\n\ndef test_show_json(mocker, capsys):\n    cli_args = parse_args([\"list\", \"local_dir\", \"--json\"])\n    assert cli_args.func == CmdList\n\n    cmd = cli_args.func(cli_args)\n\n    result = [{\"key\": \"val\"}]\n    mocker.patch(\"dvc.repo.Repo.ls\", return_value=result)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert json.dumps(result) in out\n\n\ndef test_show_colors(mocker, capsys, monkeypatch):\n    cli_args = parse_args([\"list\", \"local_dir\"])\n    assert cli_args.func == CmdList\n    cmd = cli_args.func(cli_args)\n\n    monkeypatch.setenv(\"LS_COLORS\", \"ex=01;32:rs=0:di=01;34:*.xml=01;31:*.dvc=01;33:\")\n    result = [\n        {\"isdir\": False, \"isexec\": 0, \"isout\": False, \"path\": \".dvcignore\"},\n        {\"isdir\": False, \"isexec\": 0, \"isout\": False, \"path\": \".gitignore\"},\n        {\"isdir\": False, \"isexec\": 0, \"isout\": False, \"path\": \"README.md\"},\n        {\"isdir\": True, \"isexec\": 0, \"isout\": True, \"path\": \"data\"},\n        {\"isdir\": False, \"isexec\": 0, \"isout\": True, \"path\": \"structure.xml\"},\n        {\"isdir\": False, \"isexec\": 0, \"isout\": False, \"path\": \"structure.xml.dvc\"},\n        {\"isdir\": True, \"isexec\": 0, \"isout\": False, \"path\": \"src\"},\n        {\"isdir\": False, \"isexec\": 1, \"isout\": False, \"path\": \"run.sh\"},\n    ]\n    mocker.patch(\"dvc.repo.Repo.ls\", return_value=result)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    entries = out.splitlines()\n\n    assert entries == [\n        \".dvcignore\",\n        \".gitignore\",\n        \"README.md\",\n        \"\\x1b[01;34mdata\\x1b[0m\",\n        \"\\x1b[01;31mstructure.xml\\x1b[0m\",\n        \"\\x1b[01;33mstructure.xml.dvc\\x1b[0m\",\n        \"\\x1b[01;34msrc\\x1b[0m\",\n        \"\\x1b[01;32mrun.sh\\x1b[0m\",\n    ]\n\n\ndef test_show_size(mocker, capsys):\n    cli_args = parse_args([\"list\", \"local_dir\", \"--size\"])\n    assert cli_args.func == CmdList\n    cmd = cli_args.func(cli_args)\n\n    result = [\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".dvcignore\",\n            \"size\": 100,\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".gitignore\",\n            \"size\": 200,\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \"README.md\",\n            \"size\": 100_000,\n        },\n    ]\n    mocker.patch(\"dvc.repo.Repo.ls\", return_value=result)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    entries = out.splitlines()\n\n    assert entries == [\"  100  .dvcignore\", \"  200  .gitignore\", \"97.7k  README.md\"]\n\n\ndef test_show_hash(mocker, capsys):\n    cli_args = parse_args([\"list\", \"local_dir\", \"--show-hash\"])\n    assert cli_args.func == CmdList\n    cmd = cli_args.func(cli_args)\n\n    result = [\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".dvcignore\",\n            \"md5\": \"123\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".gitignore\",\n            \"md5\": \"456\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \"README.md\",\n            \"md5\": \"789\",\n        },\n    ]\n    mocker.patch(\"dvc.repo.Repo.ls\", return_value=result)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    entries = out.splitlines()\n\n    assert entries == [\"123  .dvcignore\", \"456  .gitignore\", \"789  README.md\"]\n\n\ndef test_show_size_and_hash(mocker, capsys):\n    cli_args = parse_args([\"list\", \"local_dir\", \"--size\", \"--show-hash\"])\n    assert cli_args.func == CmdList\n    cmd = cli_args.func(cli_args)\n\n    result = [\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".dvcignore\",\n            \"size\": 100,\n            \"md5\": \"123\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \".gitignore\",\n            \"size\": 200,\n            \"md5\": \"456\",\n        },\n        {\n            \"isdir\": False,\n            \"isexec\": 0,\n            \"isout\": False,\n            \"path\": \"README.md\",\n            \"size\": 100_000,\n            \"md5\": \"789\",\n        },\n    ]\n    mocker.patch(\"dvc.repo.Repo.ls\", return_value=result)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    entries = out.splitlines()\n\n    assert entries == [\n        \"  100  123  .dvcignore\",\n        \"  200  456  .gitignore\",\n        \"97.7k  789  README.md\",\n    ]\n\n\ndef test_show_tree(capsys):\n    entries = {\n        \"data\": {\n            \"isout\": True,\n            \"isdir\": True,\n            \"isexec\": False,\n            \"size\": 192,\n            \"path\": \"data\",\n            \"md5\": \"3fb071066d5d5b282f56a0169340346d.dir\",\n            \"contents\": {\n                \"dir\": {\n                    \"isout\": False,\n                    \"isdir\": True,\n                    \"isexec\": False,\n                    \"size\": 96,\n                    \"path\": \"data/dir\",\n                    \"md5\": None,\n                    \"contents\": {\n                        \"subdir\": {\n                            \"isout\": True,\n                            \"isdir\": True,\n                            \"isexec\": False,\n                            \"size\": 96,\n                            \"md5\": None,\n                            \"path\": \"data/dir/subdir\",\n                            \"contents\": {\n                                \"foobar\": {\n                                    \"isout\": True,\n                                    \"isdir\": False,\n                                    \"isexec\": False,\n                                    \"size\": 4,\n                                    \"md5\": \"d3b07384d113edec49eaa6238ad5ff00\",\n                                }\n                            },\n                        },\n                        \"foo\": {\n                            \"isout\": False,\n                            \"isdir\": False,\n                            \"isexec\": False,\n                            \"size\": 4,\n                            \"path\": \"data/dir/foo\",\n                            \"md5\": None,\n                        },\n                    },\n                },\n                \"bar\": {\n                    \"isout\": True,\n                    \"isdir\": False,\n                    \"isexec\": False,\n                    \"size\": 4,\n                    \"path\": \"data/bar\",\n                    \"md5\": \"c157a79031e1c40f85931829bc5fc552\",\n                },\n                \"large-file\": {\n                    \"isout\": False,\n                    \"isdir\": False,\n                    \"isexec\": False,\n                    \"size\": 1073741824,\n                    \"path\": \"data/large-file\",\n                    \"md5\": None,\n                },\n                \"dir2\": {\n                    \"isout\": True,\n                    \"isdir\": True,\n                    \"isexec\": False,\n                    \"size\": 96,\n                    \"md5\": None,\n                    \"path\": \"data/dir2\",\n                    \"contents\": {\n                        \"foo\": {\n                            \"isout\": True,\n                            \"isdir\": False,\n                            \"isexec\": False,\n                            \"size\": 4,\n                            \"path\": \"data/dir2/foo\",\n                            \"md5\": \"d3b07384d113edec49eaa6238ad5ff00\",\n                        }\n                    },\n                },\n            },\n        }\n    }\n\n    show_tree(entries, with_color=False)\n    out, _ = capsys.readouterr()\n    expected = \"\"\"\\\ndata\n├── dir\n│   ├── subdir\n│   │   └── foobar\n│   └── foo\n├── bar\n├── large-file\n└── dir2\n    └── foo\n\"\"\"\n    assert out == expected\n\n    show_tree(entries, with_color=False, with_size=True)\n    out, _ = capsys.readouterr()\n    expected = \"\"\"\\\n  192  data\n   96  ├── dir\n   96  │   ├── subdir\n    4  │   │   └── foobar\n    4  │   └── foo\n    4  ├── bar\n1.00G  ├── large-file\n   96  └── dir2\n    4      └── foo\n\"\"\"\n    assert out == expected\n\n    show_tree(entries, with_color=False, with_hash=True)\n    out, _ = capsys.readouterr()\n    expected = \"\"\"\\\n3fb071066d5d5b282f56a0169340346d.dir  data\n-                                     ├── dir\n-                                     │   ├── subdir\nd3b07384d113edec49eaa6238ad5ff00      │   │   └── foobar\n-                                     │   └── foo\nc157a79031e1c40f85931829bc5fc552      ├── bar\n-                                     ├── large-file\n-                                     └── dir2\nd3b07384d113edec49eaa6238ad5ff00          └── foo\n\"\"\"\n    assert out == expected\n\n    show_tree(entries, with_color=False, with_hash=True, with_size=True)\n    out, _ = capsys.readouterr()\n    expected = \"\"\"\\\n  192  3fb071066d5d5b282f56a0169340346d.dir  data\n   96  -                                     ├── dir\n   96  -                                     │   ├── subdir\n    4  d3b07384d113edec49eaa6238ad5ff00      │   │   └── foobar\n    4  -                                     │   └── foo\n    4  c157a79031e1c40f85931829bc5fc552      ├── bar\n1.00G  -                                     ├── large-file\n   96  -                                     └── dir2\n    4  d3b07384d113edec49eaa6238ad5ff00          └── foo\n\"\"\"\n    assert out == expected\n\n\ndef test_list_alias():\n    cli_args = parse_args([\"ls\", \"local_dir\"])\n    assert cli_args.func == CmdList\n"
  },
  {
    "path": "tests/unit/command/ls/test_ls_colors.py",
    "content": "from dvc.commands.ls.ls_colors import LsColors\n\n\ndef colorize(ls_colors):\n    def _colorize(f, spec=\"\"):\n        fs_path = {\n            \"path\": f,\n            \"isexec\": \"e\" in spec,\n            \"isdir\": \"d\" in spec,\n            \"isout\": \"o\" in spec,\n        }\n        return ls_colors.format(fs_path)\n\n    return _colorize\n\n\ndef test_ls_colors_out_file():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"file\", \"o\") == \"file\"\n\n\ndef test_ls_colors_out_dir():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"dir\", \"do\") == \"\\x1b[01;34mdir\\x1b[0m\"\n\n\ndef test_ls_colors_out_exec():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"script.sh\", \"eo\") == \"\\x1b[01;32mscript.sh\\x1b[0m\"\n\n\ndef test_ls_colors_out_ext():\n    ls_colors = LsColors(LsColors.default + \":*.xml=01;33\")\n    assert colorize(ls_colors)(\"file.xml\", \"o\") == \"\\x1b[01;33mfile.xml\\x1b[0m\"\n\n\ndef test_ls_colors_file():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"file\") == \"file\"\n\n\ndef test_ls_colors_dir():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"dir\", \"d\") == \"\\x1b[01;34mdir\\x1b[0m\"\n\n\ndef test_ls_colors_exec():\n    ls_colors = LsColors(LsColors.default)\n    assert colorize(ls_colors)(\"script.sh\", \"e\") == \"\\x1b[01;32mscript.sh\\x1b[0m\"\n\n\ndef test_ls_colors_ext():\n    ls_colors = LsColors(LsColors.default + \":*.xml=01;33\")\n    assert colorize(ls_colors)(\"file.xml\") == \"\\x1b[01;33mfile.xml\\x1b[0m\"\n\n\ndef test_ls_repo_with_custom_color_env_defined(monkeypatch):\n    monkeypatch.setenv(\"LS_COLORS\", \"rs=0:di=01;34:*.xml=01;31:*.dvc=01;33:\")\n    ls_colors = LsColors()\n    colorizer = colorize(ls_colors)\n\n    assert colorizer(\".dvcignore\") == \".dvcignore\"\n    assert colorizer(\".gitignore\") == \".gitignore\"\n    assert colorizer(\"README.md\") == \"README.md\"\n    assert colorizer(\"data\", \"d\") == \"\\x1b[01;34mdata\\x1b[0m\"\n    assert colorizer(\"structure.xml\") == \"\\x1b[01;31mstructure.xml\\x1b[0m\"\n    assert colorizer(\"structure.xml.dvc\") == \"\\x1b[01;33mstructure.xml.dvc\\x1b[0m\"\n"
  },
  {
    "path": "tests/unit/command/test_add.py",
    "content": "import logging\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.add import CmdAdd\n\n\ndef test_add(mocker, dvc):\n    cli_args = parse_args([\"add\", \"--no-commit\", \"--glob\", \"data\"])\n    assert cli_args.func == CmdAdd\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"add\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        [\"data\"],\n        no_commit=True,\n        glob=True,\n        out=None,\n        remote=None,\n        to_remote=False,\n        remote_jobs=None,\n        force=False,\n        relink=True,\n    )\n\n\ndef test_add_to_remote(mocker, dvc):\n    cli_args = parse_args(\n        [\"add\", \"s3://bucket/foo\", \"--to-remote\", \"--out\", \"bar\", \"--remote\", \"remote\"]\n    )\n    assert cli_args.func == CmdAdd\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"add\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        [\"s3://bucket/foo\"],\n        no_commit=False,\n        glob=False,\n        out=\"bar\",\n        remote=\"remote\",\n        to_remote=True,\n        remote_jobs=None,\n        force=False,\n        relink=True,\n    )\n\n\ndef test_add_to_remote_invalid_combinations(mocker, caplog, dvc):\n    cli_args = parse_args([\"add\", \"s3://bucket/foo\", \"s3://bucket/bar\", \"--to-remote\"])\n    assert cli_args.func == CmdAdd\n\n    cmd = cli_args.func(cli_args)\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert cmd.run() == 1\n        expected_msg = \"multiple targets can't be used with --to-remote\"\n        assert expected_msg in caplog.text\n\n    for option, value in ((\"--remote\", \"remote\"), (\"--remote-jobs\", \"4\")):\n        cli_args = parse_args([\"add\", \"foo\", option, value])\n\n        cmd = cli_args.func(cli_args)\n        with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n            assert cmd.run() == 1\n            expected_msg = f\"{option} can't be used without --to-remote\"\n            assert expected_msg in caplog.text\n\n\ndef test_add_to_cache_invalid_combinations(mocker, caplog, dvc):\n    cli_args = parse_args([\"add\", \"s3://bucket/foo\", \"s3://bucket/bar\", \"-o\", \"foo\"])\n    assert cli_args.func == CmdAdd\n\n    cmd = cli_args.func(cli_args)\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert cmd.run() == 1\n        expected_msg = \"multiple targets can't be used with --out\"\n        assert expected_msg in caplog.text\n"
  },
  {
    "path": "tests/unit/command/test_cache.py",
    "content": "import os\nimport textwrap\n\nfrom dvc.cli import main\n\n\ndef test_cache_dir_local(tmp_dir, dvc, capsys, caplog):\n    (tmp_dir / \".dvc\" / \"config.local\").write_text(\n        textwrap.dedent(\n            \"\"\"\\\n            [cache]\n                dir = some/path\n            \"\"\"\n        )\n    )\n    path = os.path.join(dvc.dvc_dir, \"some\", \"path\")\n\n    assert main([\"cache\", \"dir\", \"--local\"]) == 0\n\n    out, _ = capsys.readouterr()\n    assert path in out\n\n    assert main([\"cache\", \"dir\"]) == 0\n    out, _ = capsys.readouterr()\n    assert path in out\n\n    assert main([\"cache\", \"dir\", \"--project\"]) == 251\n    assert \"option 'dir' doesn't exist in section 'cache'\" in caplog.text\n"
  },
  {
    "path": "tests/unit/command/test_checkout.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.checkout import CmdCheckout, log_changes\n\n\ndef test_checkout(tmp_dir, dvc, mocker):\n    cli_args = parse_args([\"checkout\", \"foo.dvc\", \"bar.dvc\", \"--relink\", \"--with-deps\"])\n    assert cli_args.func == CmdCheckout\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.checkout\")\n\n    assert cmd.run() == 0\n    m.assert_called_once_with(\n        targets=[\"foo.dvc\", \"bar.dvc\"],\n        force=False,\n        recursive=False,\n        relink=True,\n        with_deps=True,\n        allow_missing=False,\n    )\n\n\ndef test_log_changes(capsys):\n    stats = {\n        \"added\": [\"file1\", \"dir1/\"],\n        \"deleted\": [\"dir2/\"],\n        \"modified\": [\"file2\"],\n    }\n\n    from itertools import zip_longest\n\n    def _assert_output(stats, expected_outs):\n        log_changes(stats)\n        out, _ = capsys.readouterr()\n        actual_output = out.splitlines()\n        for out, line in zip_longest(expected_outs, actual_output):\n            assert out.expandtabs() in line\n\n    _assert_output(stats, [\"M\\tfile2\", \"A\\tfile1\", \"A\\tdir1/\", \"D\\tdir2/\"])\n\n    del stats[\"deleted\"][0]\n    _assert_output(stats, [\"M\\tfile2\", \"A\\tfile1\", \"A\\tdir1/\"])\n\n    del stats[\"modified\"]\n    _assert_output(stats, [\"A\\tfile1\", \"A\\tdir1/\"])\n"
  },
  {
    "path": "tests/unit/command/test_compat_flag.py",
    "content": "from itertools import takewhile\n\nimport pytest\n\nfrom dvc.cli import parse_args\n\n\ndef _id_gen(val) -> str:\n    if isinstance(val, list):\n        return \"-\".join(takewhile(lambda v: not v.startswith(\"-\"), val))\n    return str(val)\n\n\n@pytest.mark.parametrize(\n    \"args, key\",\n    [\n        ([\"exp\", \"list\", \"--names-only\"], \"name_only\"),\n        ([\"stage\", \"list\", \"--names-only\"], \"name_only\"),\n    ],\n    ids=_id_gen,\n)\ndef test_backward_compat_flags(args, key):\n    \"\"\"Test support for flags kept for backward compatibility.\"\"\"\n    cli_args = parse_args(args)\n    d = vars(cli_args)\n    assert d[key] is True\n"
  },
  {
    "path": "tests/unit/command/test_completion.py",
    "content": "import logging\n\nimport pytest\n\nfrom dvc.cli import main\nfrom dvc.commands.completion import SUPPORTED_SHELLS\n\n\n@pytest.mark.parametrize(\"shell\", SUPPORTED_SHELLS)\ndef test_completion(caplog, capsys, shell):\n    with caplog.at_level(logging.INFO):\n        assert main([\"completion\", \"-s\", shell]) == 0\n    assert not caplog.text\n\n    out, err = capsys.readouterr()\n    assert not err\n    assert out\n"
  },
  {
    "path": "tests/unit/command/test_config.py",
    "content": "import pytest\n\nfrom dvc.cli import DvcParserError, parse_args\nfrom dvc.commands.config import CmdConfig\n\n\ndef test_config_formatter():\n    example_config = {\n        \"section_foo\": {\"option_bar\": True, \"option_baz\": False},\n        \"section_foo2\": {\n            \"option_bar2\": {\"option_baz2\": True},\n            \"option_baz3\": {\"option_baz4\": False},\n        },\n        \"section_foo3\": {},\n    }\n\n    config_lines = tuple(CmdConfig._format_config(example_config))\n    assert config_lines == (\n        \"section_foo.option_bar=True\",\n        \"section_foo.option_baz=False\",\n        \"section_foo2.option_bar2.option_baz2=True\",\n        \"section_foo2.option_baz3.option_baz4=False\",\n    )\n\n\n@pytest.mark.parametrize(\"name\", [\"way.too.long\", \"no_option\", \"remote.way.too.long\"])\ndef test_config_bad_name(name):\n    with pytest.raises(DvcParserError):\n        parse_args([\"config\", name])\n"
  },
  {
    "path": "tests/unit/command/test_dag.py",
    "content": "import networkx as nx\nimport pytest\n\nfrom dvc.cli import main, parse_args\nfrom dvc.commands.dag import (\n    CmdDAG,\n    _build,\n    _collapse_foreach_matrix,\n    _show_ascii,\n    _show_dot,\n    _show_mermaid,\n)\nfrom dvc.parsing import JOIN\n\n\n@pytest.mark.parametrize(\n    \"fmt, formatter\",\n    [\n        (None, \"_show_ascii\"),\n        (\"--dot\", \"_show_dot\"),\n        (\"--mermaid\", \"_show_mermaid\"),\n        (\"--md\", \"_show_mermaid\"),\n    ],\n)\ndef test_dag(tmp_dir, dvc, mocker, fmt, formatter):\n    from dvc.commands import dag\n\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n\n    args = [\"dag\", \"--full\", \"foo.dvc\"]\n    if fmt:\n        args.append(fmt)\n    cli_args = parse_args(args)\n    assert cli_args.func == CmdDAG\n\n    fmt_func = mocker.spy(dag, formatter)\n\n    cmd = cli_args.func(cli_args)\n\n    mocker.patch(\"dvc.commands.dag._build\", return_value=dvc.index.graph)\n\n    assert cmd.run() == 0\n\n    assert fmt_func.called\n\n\n@pytest.fixture\ndef repo(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"a\", \"a\")\n    tmp_dir.dvc_gen(\"b\", \"b\")\n\n    dvc.run(no_exec=True, deps=[\"a\", \"c\"], outs=[\"d\", \"e\"], cmd=\"cmd1\", name=\"1\")\n    dvc.run(no_exec=True, deps=[\"b\", \"c\"], outs=[\"f\", \"g\"], cmd=\"cmd2\", name=\"2\")\n    dvc.run(no_exec=True, deps=[\"a\", \"b\", \"c\"], outs=[\"h\", \"i\"], cmd=\"cmd3\", name=\"3\")\n    dvc.run(no_exec=True, deps=[\"a\", \"h\"], outs=[\"j\"], cmd=\"cmd4\", name=\"4\")\n\n    return dvc\n\n\ndef test_collapse_foreach_matrix(repo):\n    graph = nx.DiGraph(\n        [\n            (\"2\", \"1\"),\n            (\"3@a\", \"2\"),\n            (\"3@a\", \"1\"),\n            (\"4\", \"1\"),\n            (\"3@b\", \"4\"),\n            (\"3@b\", \"1\"),\n            (\"5\", \"3@a\"),\n            (\"6\", \"3@b\"),\n            (\"7\", \"5\"),\n            (\"7\", \"6\"),\n        ]\n    )\n    expected_graph = nx.DiGraph(\n        [\n            (\"2\", \"1\"),\n            (\"3\", \"2\"),\n            (\"4\", \"1\"),\n            (\"3\", \"4\"),\n            (\"3\", \"1\"),\n            (\"5\", \"3\"),\n            (\"6\", \"3\"),\n            (\"7\", \"5\"),\n            (\"7\", \"6\"),\n        ]\n    )\n    collapsed_graph = _collapse_foreach_matrix(graph)\n    for node in collapsed_graph.nodes:\n        assert JOIN not in node\n    for n1, n2 in collapsed_graph.edges:\n        assert JOIN not in n1\n        assert JOIN not in n2\n    assert nx.is_isomorphic(collapsed_graph, expected_graph)\n\n\ndef test_build(repo):\n    assert nx.is_isomorphic(_build(repo), repo.index.graph)\n\n\ndef test_build_collapse(repo):\n    assert nx.is_isomorphic(\n        _build(repo, collapse_foreach_matrix=True), repo.index.graph\n    )\n\n\ndef test_build_target(repo):\n    graph = _build(repo, target=\"3\")\n    assert set(graph.nodes()) == {\"3\", \"b.dvc\", \"a.dvc\"}\n    assert set(graph.edges()) == {(\"3\", \"a.dvc\"), (\"3\", \"b.dvc\")}\n\n\ndef test_build_target_with_outs(repo):\n    graph = _build(repo, target=\"3\", outs=True)\n    assert set(graph.nodes()) == {\"a\", \"b\", \"h\", \"i\"}\n    assert set(graph.edges()) == {(\"i\", \"a\"), (\"i\", \"b\"), (\"h\", \"a\"), (\"h\", \"b\")}\n\n\ndef test_build_granular_target_with_outs(repo):\n    graph = _build(repo, target=\"h\", outs=True)\n    assert set(graph.nodes()) == {\"a\", \"b\", \"h\"}\n    assert set(graph.edges()) == {(\"h\", \"a\"), (\"h\", \"b\")}\n\n\ndef test_build_full(repo):\n    graph = _build(repo, target=\"3\", full=True)\n    assert nx.is_isomorphic(graph, repo.index.graph)\n\n\n# NOTE: granular or not, full outs DAG should be the same\n@pytest.mark.parametrize(\"granular\", [True, False])\ndef test_build_full_outs(repo, granular):\n    target = \"h\" if granular else \"3\"\n    graph = _build(repo, target=target, outs=True, full=True)\n    assert set(graph.nodes()) == {\"j\", \"i\", \"d\", \"b\", \"g\", \"f\", \"e\", \"a\", \"h\"}\n    assert set(graph.edges()) == {\n        (\"d\", \"a\"),\n        (\"e\", \"a\"),\n        (\"f\", \"b\"),\n        (\"g\", \"b\"),\n        (\"h\", \"a\"),\n        (\"h\", \"b\"),\n        (\"i\", \"a\"),\n        (\"i\", \"b\"),\n        (\"j\", \"a\"),\n        (\"j\", \"h\"),\n    }\n\n\ndef test_show_ascii(repo):\n    assert [line.rstrip() for line in _show_ascii(repo.index.graph).splitlines()] == [\n        \"                        +----------------+                          +----------------+\",  # noqa: E501\n        \"                        | stage: 'a.dvc' |                          | stage: 'b.dvc' |\",  # noqa: E501\n        \"                       *+----------------+****                      +----------------+\",  # noqa: E501\n        \"                  *****           *           *****                  ***           ***\",  # noqa: E501\n        \"              ****                *                *****           **                 **\",  # noqa: E501\n        \"           ***                     *                    ***      **                     **\",  # noqa: E501\n        \"+------------+                     **                   +------------+              +------------+\",  # noqa: E501\n        \"| stage: '1' |                       **                 | stage: '3' |              | stage: '2' |\",  # noqa: E501\n        \"+------------+                         ***              +------------+              +------------+\",  # noqa: E501\n        \"                                          **           ***\",\n        \"                                            **       **\",\n        \"                                              **   **\",\n        \"                                          +------------+\",\n        \"                                          | stage: '4' |\",\n        \"                                          +------------+\",\n    ]\n\n\ndef test_show_dot(repo):\n    # dot file rendering is not deterministic though graph\n    # output doesn't depend upon order of lines. Use sorted values\n    # https://github.com/treeverse/dvc/pull/7725\n    expected = [\n        \"\\\"stage: '1'\\\";\",\n        \"\\\"stage: '2'\\\";\",\n        \"\\\"stage: '3'\\\" -> \\\"stage: '4'\\\";\",\n        \"\\\"stage: '3'\\\";\",\n        \"\\\"stage: '4'\\\";\",\n        \"\\\"stage: 'a.dvc'\\\" -> \\\"stage: '1'\\\";\",\n        \"\\\"stage: 'a.dvc'\\\" -> \\\"stage: '3'\\\";\",\n        \"\\\"stage: 'a.dvc'\\\" -> \\\"stage: '4'\\\";\",\n        \"\\\"stage: 'a.dvc'\\\";\",\n        \"\\\"stage: 'b.dvc'\\\" -> \\\"stage: '2'\\\";\",\n        \"\\\"stage: 'b.dvc'\\\" -> \\\"stage: '3'\\\";\",\n        \"\\\"stage: 'b.dvc'\\\";\",\n        \"strict digraph {\",\n        \"}\",\n    ]\n    actual = sorted(line.rstrip() for line in _show_dot(repo.index.graph).splitlines())\n    assert actual == expected\n\n\ndef test_show_dot_properly_escapes():\n    graph = nx.DiGraph(\n        [\n            (\"evaluate\", \"train🚄\"),  # emoji\n            (\"evaluate\", \"featurize\"),\n            (\"featurize\", \"prepare:1\"),  # colon\n            (\"prepare:1\", \"data/raw/1.dvc\"),  # posix path\n            (\"prepare:1\", \"data\\\\raw\\\\2.dvc\"),  # windows path\n            (\"prepare\", \"4\"),  # just a number\n        ]\n    )\n\n    expected = {\n        \"strict digraph {\",\n        '\"data\\\\raw\\\\2.dvc\";',\n        '\"prepare\";',\n        '\"4\";',\n        '\"data/raw/1.dvc\";',\n        '\"train🚄\";',\n        '\"evaluate\";',\n        '\"prepare:1\";',\n        '\"featurize\";',\n        '\"data\\\\raw\\\\2.dvc\" -> \"prepare:1\";',\n        '\"4\" -> \"prepare\";',\n        '\"data/raw/1.dvc\" -> \"prepare:1\";',\n        '\"train🚄\" -> \"evaluate\";',\n        '\"prepare:1\" -> \"featurize\";',\n        '\"featurize\" -> \"evaluate\";',\n        \"}\",\n    }\n    actual = {line.rstrip() for line in _show_dot(graph).splitlines()}\n    assert actual == expected\n\n\ndef test_show_mermaid(repo):\n    assert [line.rstrip() for line in _show_mermaid(repo.index.graph).splitlines()] == [\n        \"flowchart TD\",\n        \"\\tnode1[\\\"stage: '1'\\\"]\",\n        \"\\tnode2[\\\"stage: '2'\\\"]\",\n        \"\\tnode3[\\\"stage: '3'\\\"]\",\n        \"\\tnode4[\\\"stage: '4'\\\"]\",\n        \"\\tnode5[\\\"stage: 'a.dvc'\\\"]\",\n        \"\\tnode6[\\\"stage: 'b.dvc'\\\"]\",\n        \"\\tnode3-->node4\",\n        \"\\tnode5-->node1\",\n        \"\\tnode5-->node3\",\n        \"\\tnode5-->node4\",\n        \"\\tnode6-->node2\",\n        \"\\tnode6-->node3\",\n    ]\n\n\ndef test_show_mermaid_markdown(repo, dvc, capsys, mocker):\n    mocker.patch(\"dvc.commands.dag._build\", return_value=dvc.index.graph)\n\n    capsys.readouterr()\n    assert main([\"dag\", \"--md\"]) == 0\n    assert [line.rstrip() for line in capsys.readouterr().out.splitlines()] == [\n        \"```mermaid\",\n        \"flowchart TD\",\n        \"\\tnode1[\\\"stage: '1'\\\"]\",\n        \"\\tnode2[\\\"stage: '2'\\\"]\",\n        \"\\tnode3[\\\"stage: '3'\\\"]\",\n        \"\\tnode4[\\\"stage: '4'\\\"]\",\n        \"\\tnode5[\\\"stage: 'a.dvc'\\\"]\",\n        \"\\tnode6[\\\"stage: 'b.dvc'\\\"]\",\n        \"\\tnode3-->node4\",\n        \"\\tnode5-->node1\",\n        \"\\tnode5-->node3\",\n        \"\\tnode5-->node4\",\n        \"\\tnode6-->node2\",\n        \"\\tnode6-->node3\",\n        \"```\",\n    ]\n"
  },
  {
    "path": "tests/unit/command/test_data_status.py",
    "content": "import json\n\nimport pytest\nfrom funcy import omit\n\nfrom dvc.cli import main, parse_args\nfrom dvc.commands.data import CmdDataStatus\nfrom dvc.repo import Repo\nfrom dvc.repo.data import Status\nfrom tests.func.parsing.test_errors import escape_ansi\n\n\n@pytest.fixture\ndef mocked_status():\n    return Status(\n        not_in_cache=[\"notincache\"],\n        committed={\n            \"added\": [\"dir/bar\", \"dir/foo\"],\n            \"deleted\": [\"dir/baz\"],\n            \"modified\": [\"dir/foobar\"],\n            \"unknown\": [\"dir/unknown1\"],\n            \"renamed\": [{\"old\": \"dir/\", \"new\": \"dir2/\"}],\n        },\n        uncommitted={\n            \"added\": [\"dir/baz\"],\n            \"modified\": [\"dir/bar\"],\n            \"deleted\": [\"dir/foobar\"],\n            \"unknown\": [\"dir2/unknown2\"],\n            \"renamed\": [{\"old\": \"dir2/file-old\", \"new\": \"dir2/file-new\"}],\n        },\n        untracked=[\"untracked\"],\n        unchanged=[\"dir/foo\"],\n        git={\"is_dirty\": True, \"is_empty\": False},\n    )\n\n\ndef test_cli(dvc, mocker, mocked_status):\n    status = mocker.patch(\"dvc.repo.Repo.data_status\", return_value=mocked_status)\n\n    cli_args = parse_args(\n        [\n            \"data\",\n            \"status\",\n            \"--json\",\n            \"--unchanged\",\n            \"--untracked-files\",\n            \"--granular\",\n            \"--remote\",\n            \"myremote\",\n        ]\n    )\n\n    assert cli_args.func == CmdDataStatus\n    cmd = cli_args.func(cli_args)\n    assert cmd.run() == 0\n    status.assert_called_once_with(\n        targets=[],\n        untracked_files=\"all\",\n        not_in_remote=False,\n        remote=\"myremote\",\n        remote_refresh=True,\n        granular=True,\n    )\n\n\n@pytest.mark.parametrize(\n    \"args, to_omit\",\n    [\n        ([], [\"untracked\", \"unchanged\"]),\n        ([\"--unchanged\"], [\"untracked\"]),\n        ([\"--unchanged\", \"--untracked-files\"], []),\n    ],\n)\ndef test_json(dvc, mocker, capsys, mocked_status, args, to_omit):\n    mocker.patch(\"dvc.repo.Repo.data_status\", return_value=mocked_status)\n    assert main([\"data\", \"status\", \"--json\", *args]) == 0\n    out, err = capsys.readouterr()\n    assert out.rstrip() == json.dumps(omit(mocked_status, [*to_omit, \"git\"]))\n    assert not err\n\n\ndef test_no_changes_repo(dvc, scm, capsys):\n    assert main([\"data\", \"status\"]) == 0\n    out, _ = capsys.readouterr()\n    assert out == \"No changes.\\n\"\n\n\ndef test_empty_scm_repo(tmp_dir, capsys):\n    tmp_dir.init(scm=True)\n    Repo.init()\n\n    assert main([\"data\", \"status\"]) == 0\n    out, _ = capsys.readouterr()\n    assert (\n        out\n        == \"\"\"\\\nNo changes in an empty git repo.\n(there are changes not tracked by dvc, use \"git status\" to see)\n\"\"\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"args\",\n    [\n        (\"--untracked-files\",),\n        (\"--unchanged\",),\n        (\"--untracked-files\", \"--unchanged\"),\n    ],\n)\n@pytest.mark.parametrize(\"is_dirty\", [True, False])\ndef test_show_status(dvc, scm, mocker, capsys, mocked_status, args, is_dirty):\n    mocked_status[\"git\"][\"is_dirty\"] = is_dirty\n    mocker.patch(\"dvc.repo.Repo.data_status\", return_value=mocked_status)\n    assert main([\"data\", \"status\", *args]) == 0\n    out, err = capsys.readouterr()\n    expected_out = \"\"\"\\\nNot in cache:\n  (use \"dvc fetch <file>...\" to download files)\n        notincache\n\nDVC committed changes:\n  (git commit the corresponding dvc files to update the repo)\n        added: dir/bar\n        added: dir/foo\n        deleted: dir/baz\n        modified: dir/foobar\n        unknown: dir/unknown1\n        renamed: dir/ -> dir2/\n\nDVC uncommitted changes:\n  (use \"dvc commit <file>...\" to track changes)\n  (use \"dvc checkout <file>...\" to discard changes)\n        added: dir/baz\n        modified: dir/bar\n        deleted: dir/foobar\n        unknown: dir2/unknown2\n        renamed: dir2/file-old -> dir2/file-new\n\"\"\"\n    if \"--untracked-files\" in args:\n        expected_out += \"\"\"\nUntracked files:\n  (use \"git add <file> ...\" or \"dvc add <file>...\" to commit to git or to dvc)\n        untracked\n\"\"\"\n    if \"--unchanged\" in args:\n        expected_out += \"\"\"\nDVC unchanged files:\n        dir/foo\n\"\"\"\n\n    if is_dirty:\n        expected_out += \"\"\"\\\n(there are other changes not tracked by dvc, use \"git status\" to see)\n\"\"\"\n    assert escape_ansi(out) == expected_out\n    assert not err\n"
  },
  {
    "path": "tests/unit/command/test_data_sync.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.data_sync import CmdDataFetch, CmdDataPull, CmdDataPush\n\n\ndef test_fetch(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"fetch\",\n            \"target1\",\n            \"target2\",\n            \"--jobs\",\n            \"2\",\n            \"--remote\",\n            \"remote\",\n            \"--all-branches\",\n            \"--all-tags\",\n            \"--all-commits\",\n            \"--with-deps\",\n            \"--recursive\",\n            \"--run-cache\",\n            \"--max-size\",\n            \"10\",\n            \"--type\",\n            \"plots\",\n            \"--type\",\n            \"metrics\",\n        ]\n    )\n    assert cli_args.func == CmdDataFetch\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"fetch\", autospec=True, return_value=0)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"target1\", \"target2\"],\n        jobs=2,\n        remote=\"remote\",\n        all_branches=True,\n        all_tags=True,\n        all_commits=True,\n        with_deps=True,\n        recursive=True,\n        run_cache=True,\n        max_size=10,\n        types=[\"plots\", \"metrics\"],\n    )\n\n\ndef test_pull(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"pull\",\n            \"target1\",\n            \"target2\",\n            \"--jobs\",\n            \"2\",\n            \"--remote\",\n            \"remote\",\n            \"--all-branches\",\n            \"--all-tags\",\n            \"--all-commits\",\n            \"--with-deps\",\n            \"--force\",\n            \"--recursive\",\n            \"--run-cache\",\n            \"--glob\",\n            \"--allow-missing\",\n        ]\n    )\n    assert cli_args.func == CmdDataPull\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"pull\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"target1\", \"target2\"],\n        jobs=2,\n        remote=\"remote\",\n        all_branches=True,\n        all_tags=True,\n        all_commits=True,\n        with_deps=True,\n        force=True,\n        recursive=True,\n        run_cache=True,\n        glob=True,\n        allow_missing=True,\n    )\n\n\ndef test_push(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"push\",\n            \"target1\",\n            \"target2\",\n            \"--jobs\",\n            \"2\",\n            \"--remote\",\n            \"remote\",\n            \"--all-branches\",\n            \"--all-tags\",\n            \"--all-commits\",\n            \"--with-deps\",\n            \"--recursive\",\n            \"--run-cache\",\n            \"--glob\",\n        ]\n    )\n    assert cli_args.func == CmdDataPush\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"push\", autospec=True, return_value=0)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"target1\", \"target2\"],\n        jobs=2,\n        remote=\"remote\",\n        all_branches=True,\n        all_tags=True,\n        all_commits=True,\n        with_deps=True,\n        recursive=True,\n        run_cache=True,\n        glob=True,\n    )\n"
  },
  {
    "path": "tests/unit/command/test_dataset.py",
    "content": "from datetime import datetime, timezone\n\nimport pytest\n\nfrom dvc.cli import main\n\n\n@pytest.mark.parametrize(\n    \"spec,lock,expected_output\",\n    [\n        (\n            {\"name\": \"ds\", \"url\": \"url\", \"type\": \"dvc\", \"path\": \"path\", \"rev\": \"main\"},\n            {\"rev_lock\": \"0\" * 40},\n            \"Adding ds (url:/path @ main)\\n\",\n        ),\n        (\n            {\"name\": \"mydataset\", \"url\": \"dc://dataset\", \"type\": \"dc\"},\n            {\"version\": 1, \"created_at\": datetime.now(tz=timezone.utc)},\n            \"Adding mydataset (dc://dataset @ v1)\\n\",\n        ),\n        (\n            {\"name\": \"mydataset\", \"url\": \"s3://bucket/path\", \"type\": \"url\"},\n            {\n                \"files\": [{\"relpath\": \"foo\", \"meta\": {\"version_id\": 1}}],\n                \"meta\": {\"isdir\": True},\n            },\n            \"Adding mydataset (s3://bucket/path)\\n\",\n        ),\n    ],\n)\ndef test_add(dvc, capsys, mocker, spec, lock, expected_output):\n    dataset = dvc.datasets._build_dataset(\"dvc.yaml\", spec, spec | lock)\n\n    m = mocker.patch(\"dvc.repo.datasets.Datasets.add\", return_value=dataset)\n\n    assert main([\"dataset\", \"add\", spec[\"name\"], f\"--{spec['type']}\", spec[\"url\"]]) == 0\n    out, err = capsys.readouterr()\n    assert out == expected_output\n    assert not err\n    m.assert_called_once()\n\n\ndef test_add_already_exists(dvc, caplog, mocker):\n    spec = {\"name\": \"ds\", \"url\": \"url\", \"type\": \"dvc\"}\n    dataset = dvc.datasets._build_dataset(\"dvc.yaml\", spec, None)\n    mocker.patch(\"dvc.repo.datasets.Datasets.get\", return_value=dataset)\n\n    assert main([\"dataset\", \"add\", \"ds\", \"--dc\", \"dataset\"]) == 255\n    assert \"ds already exists in dvc.yaml, use the --force to overwrite\" in caplog.text\n\n\n@pytest.mark.parametrize(\"lock\", [\"missing\", \"unchanged\", \"updated\"])\n@pytest.mark.parametrize(\n    \"spec,old_lock,new_lock,expected_outputs\",\n    [\n        (\n            {\"name\": \"ds\", \"url\": \"url\", \"type\": \"dvc\", \"path\": \"path\", \"rev\": \"main\"},\n            {\"rev_lock\": \"0\" * 40},\n            {\"rev_lock\": \"1\" * 40},\n            {\n                \"missing\": \"Updating ds (url:/path @ main)\\n\",\n                \"unchanged\": \"Nothing to update\\n\",\n                \"updated\": \"Updating ds (000000000 -> 111111111)\\n\",\n            },\n        ),\n        (\n            {\"name\": \"mydataset\", \"url\": \"dc://dataset\", \"type\": \"dc\"},\n            {\"version\": 1, \"created_at\": datetime.now(tz=timezone.utc)},\n            {\"version\": 2},\n            {\n                \"missing\": \"Updating mydataset (dc://dataset @ v2)\\n\",\n                \"unchanged\": \"Nothing to update\\n\",\n                \"updated\": \"Updating mydataset (v1 -> v2)\\n\",\n            },\n        ),\n        (\n            {\"name\": \"mydataset\", \"url\": \"dc://dataset\", \"type\": \"dc\"},\n            {\"version\": 2, \"created_at\": datetime.now(tz=timezone.utc)},\n            {\"version\": 1},\n            {\n                \"missing\": \"Updating mydataset (dc://dataset @ v1)\\n\",\n                \"unchanged\": \"Nothing to update\\n\",\n                \"updated\": \"Downgrading mydataset (v2 -> v1)\\n\",\n            },\n        ),\n        (\n            {\"name\": \"mydataset\", \"url\": \"s3://bucket/path\", \"type\": \"url\"},\n            {\n                \"files\": [\n                    {\"relpath\": \"bar\", \"meta\": {\"version_id\": 2}},\n                    {\"relpath\": \"baz\", \"meta\": {\"version_id\": 3}},\n                    {\"relpath\": \"foo\", \"meta\": {\"version_id\": 1}},\n                ],\n                \"meta\": {\"isdir\": True},\n            },\n            {\n                \"files\": [\n                    {\"relpath\": \"bar\", \"meta\": {\"version_id\": 2}},  # unchanged\n                    {\"relpath\": \"baz\", \"meta\": {\"version_id\": 4}},  # modified\n                    # `foo` deleted\n                    {\"relpath\": \"foobar\", \"meta\": {\"version_id\": 5}},  # new\n                ],\n                \"meta\": {\"isdir\": True},\n            },\n            {\n                \"missing\": \"Updating mydataset (s3://bucket/path)\\n\",\n                \"unchanged\": \"Nothing to update\\n\",\n                \"updated\": (\n                    r\"Updating mydataset (s3://bucket/path)\"\n                    \"\\n\"\n                    \"M\\tbaz\\n\"\n                    \"A\\tfoobar\\n\"\n                    \"D\\tfoo\\n\"\n                ).expandtabs(8),\n            },\n        ),\n    ],\n)\ndef test_update(dvc, capsys, mocker, spec, old_lock, new_lock, expected_outputs, lock):\n    if lock == \"missing\":\n        new_lock = spec | old_lock | new_lock\n        old_lock = None\n    elif lock == \"unchanged\":\n        old_lock = new_lock = spec | old_lock\n    else:\n        old_lock = spec | old_lock\n        new_lock = old_lock | new_lock\n\n    old = dvc.datasets._build_dataset(\"dvc.yaml\", spec, old_lock)\n    new = dvc.datasets._build_dataset(\"dvc.yaml\", spec, new_lock)\n\n    m = mocker.patch(\"dvc.repo.datasets.Datasets.update\", return_value=(old, new))\n    assert main([\"dataset\", \"update\", spec[\"name\"]]) == 0\n    out, err = capsys.readouterr()\n    assert out == expected_outputs[lock]\n    assert not err\n\n    m.assert_called_once()\n"
  },
  {
    "path": "tests/unit/command/test_diff.py",
    "content": "import collections\nimport os\n\nimport pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.diff import _digest, _show_markdown\n\n\n@pytest.mark.parametrize(\n    \"checksum, expected\",\n    [\n        (\"wxyz1234pq\", \"wxyz1234\"),\n        ({\"old\": \"1234567890\", \"new\": \"0987654321\"}, \"12345678..09876543\"),\n    ],\n    ids=[\"str\", \"dict\"],\n)\ndef test_digest(checksum, expected):\n    assert expected == _digest(checksum)\n\n\ndef test_default(mocker, capsys, dvc):\n    args = parse_args([\"diff\"])\n    cmd = args.func(args)\n    diff = {\n        \"added\": [{\"path\": \"file\", \"hash\": \"00000000\"}],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"data\", \"file_old\"),\n                    \"new\": os.path.join(\"data\", \"file_new\"),\n                },\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n\n    assert cmd.run() == 0\n    assert (\n        \"Added:\\n\"\n        \"    file\\n\"\n        \"\\n\"\n        \"Renamed:\\n\"\n        f\"    data{os.path.sep}file_old -> data{os.path.sep}file_new\\n\"\n        \"\\n\"\n        \"files summary: 1 added, 1 renamed\"\n    ) in capsys.readouterr()[0]\n\n\ndef test_show_hash(mocker, capsys, dvc):\n    args = parse_args([\"diff\", \"--show-hash\"])\n    cmd = args.func(args)\n    diff = {\n        \"added\": [],\n        \"deleted\": [\n            {\"path\": os.path.join(\"data\", \"\"), \"hash\": \"XXXXXXXX.dir\"},\n            {\"path\": os.path.join(\"data\", \"foo\"), \"hash\": \"11111111\"},\n            {\"path\": os.path.join(\"data\", \"bar\"), \"hash\": \"00000000\"},\n        ],\n        \"modified\": [\n            {\"path\": \"file2\", \"hash\": {\"old\": \"AAAAAAAA\", \"new\": \"BBBBBBBB\"}},\n            {\"path\": \"file1\", \"hash\": {\"old\": \"CCCCCCCC\", \"new\": \"DDDDDDDD\"}},\n        ],\n        \"renamed\": [\n            {\n                \"path\": {\n                    \"old\": os.path.join(\"data\", \"file_old\"),\n                    \"new\": os.path.join(\"data\", \"file_new\"),\n                },\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n    assert cmd.run() == 0\n\n    out, _ = capsys.readouterr()\n    assert (\n        \"Deleted:\\n    XXXXXXXX  \"\n        + os.path.join(\"data\", \"\")\n        + \"\\n    00000000  \"\n        + os.path.join(\"data\", \"bar\")\n        + \"\\n    11111111  \"\n        + os.path.join(\"data\", \"foo\")\n        + \"\\n\\nRenamed:\\n    11111111  \"\n        + os.path.join(\"data\", \"file_old\")\n        + \" -> \"\n        + os.path.join(\"data\", \"file_new\")\n        + \"\\n\"\n        \"\\n\"\n        \"Modified:\\n\"\n        \"    CCCCCCCC..DDDDDDDD  file1\\n\"\n        \"    AAAAAAAA..BBBBBBBB  file2\\n\"\n        \"\\n\"\n        \"files summary: 2 deleted, 1 renamed, 2 modified\"\n    ) in out\n\n\ndef test_show_json(mocker, capsys, dvc):\n    args = parse_args([\"diff\", \"--json\"])\n    cmd = args.func(args)\n    diff = {\n        \"added\": [\n            {\"path\": \"file2\", \"hash\": \"22222222\"},\n            {\"path\": \"file1\", \"hash\": \"11111111\"},\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert '\"added\": [{\"path\": \"file1\"}, {\"path\": \"file2\"}]' in out\n    assert '\"deleted\": []' in out\n    assert '\"modified\": []' in out\n    assert '\"not in cache\": []' in out\n\n\ndef test_show_json_and_hash(mocker, capsys, dvc):\n    args = parse_args([\"diff\", \"--json\", \"--show-hash\"])\n    cmd = args.func(args)\n\n    diff = {\n        \"added\": [\n            # py35: maintain a consistent key order for tests purposes\n            collections.OrderedDict([(\"path\", \"file2\"), (\"hash\", \"22222222\")]),\n            collections.OrderedDict([(\"path\", \"file1\"), (\"hash\", \"11111111\")]),\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [\n            {\n                \"path\": {\"old\": \"file_old\", \"new\": \"file_new\"},\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert (\n        '\"added\": [{\"path\": \"file1\", \"hash\": \"11111111\"}, '\n        '{\"path\": \"file2\", \"hash\": \"22222222\"}]' in out\n    )\n    assert '\"deleted\": []' in out\n    assert '\"modified\": []' in out\n    assert (\n        '\"renamed\": [{\"path\": {\"old\": \"file_old\", '\n        '\"new\": \"file_new\"}, \"hash\": \"11111111\"}]' in out\n    )\n    assert '\"not in cache\": []' in out\n\n\ndef test_show_json_hide_missing(mocker, capsys, dvc):\n    args = parse_args([\"diff\", \"--json\", \"--hide-missing\"])\n    cmd = args.func(args)\n    diff = {\n        \"added\": [\n            {\"path\": \"file2\", \"hash\": \"22222222\"},\n            {\"path\": \"file1\", \"hash\": \"11111111\"},\n        ],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [\n            {\n                \"path\": {\"old\": \"file_old\", \"new\": \"file_new\"},\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert '\"added\": [{\"path\": \"file1\"}, {\"path\": \"file2\"}]' in out\n    assert '\"deleted\": []' in out\n    assert '\"renamed\": [{\"path\": {\"old\": \"file_old\", \"new\": \"file_new\"}' in out\n    assert '\"modified\": []' in out\n    assert '\"not in cache\": []' not in out\n\n\n@pytest.mark.parametrize(\"show_hash\", [None, True, False])\ndef test_diff_show_markdown_and_hash(mocker, show_hash, dvc):\n    options = [\"diff\", \"--md\"] + ([\"--show-hash\"] if show_hash else [])\n    args = parse_args(options)\n    cmd = args.func(args)\n\n    diff = {}\n    show_hash = show_hash if show_hash else False\n    mock_show_markdown = mocker.patch(\"dvc.commands.diff._show_markdown\")\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff.copy())\n\n    assert cmd.run() == 0\n    mock_show_markdown.assert_called_once_with(diff, show_hash, False)\n\n\n@pytest.mark.parametrize(\n    \"opts\",\n    [\n        [],\n        [\"a_rev\", \"b_rev\"],\n        [\"--targets\", \".\"],\n        [\"--hide-missing\"],\n    ],\n)\n@pytest.mark.parametrize(\n    \"show, expected\",\n    [\n        ([], \"\"),\n        ([\"--json\"], \"{}\"),\n        ([\"--md\"], \"| Status   | Path   |\\n|----------|--------|\"),\n    ],\n)\ndef test_no_changes(mocker, capsys, opts, show, expected, dvc):\n    args = parse_args([\"diff\", *opts, *show])\n    cmd = args.func(args)\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value={})\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert expected == out.strip()\n\n\ndef test_show_markdown(capsys):\n    diff = {\n        \"deleted\": [\n            {\"path\": \"zoo\"},\n            {\"path\": os.path.join(\"data\", \"\")},\n            {\"path\": os.path.join(\"data\", \"foo\")},\n            {\"path\": os.path.join(\"data\", \"bar\")},\n        ],\n        \"modified\": [{\"path\": \"file\"}],\n        \"added\": [{\"path\": \"file\"}],\n        \"renamed\": [{\"path\": {\"old\": \"file_old\", \"new\": \"file_new\"}}],\n        \"not in cache\": [{\"path\": \"file2\"}],\n    }\n\n    _show_markdown(diff)\n    out, _ = capsys.readouterr()\n    assert out == (\n        \"| Status       | Path                 |\\n\"\n        \"|--------------|----------------------|\\n\"\n        \"| added        | file                 |\\n\"\n        \"| deleted      | zoo                  |\\n\"\n        f\"| deleted      | data{os.path.sep}                |\\n\"\n        f\"| deleted      | data{os.path.sep}foo             |\\n\"\n        f\"| deleted      | data{os.path.sep}bar             |\\n\"\n        \"| renamed      | file_old -> file_new |\\n\"\n        \"| modified     | file                 |\\n\"\n        \"| not in cache | file2                |\\n\"\n        \"\\n\"\n    )\n\n\ndef test_show_markdown_with_hash(capsys):\n    diff = {\n        \"deleted\": [\n            {\"path\": \"zoo\", \"hash\": \"22222\"},\n            {\"path\": os.path.join(\"data\", \"\"), \"hash\": \"XXXXXXXX.dir\"},\n            {\"path\": os.path.join(\"data\", \"foo\"), \"hash\": \"11111111\"},\n            {\"path\": os.path.join(\"data\", \"bar\"), \"hash\": \"00000000\"},\n        ],\n        \"modified\": [{\"path\": \"file\", \"hash\": {\"old\": \"AAAAAAAA\", \"new\": \"BBBBBBBB\"}}],\n        \"added\": [{\"path\": \"file\", \"hash\": \"00000000\"}],\n        \"renamed\": [\n            {\n                \"path\": {\"old\": \"file_old\", \"new\": \"file_new\"},\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [{\"path\": \"file2\", \"hash\": \"12345678\"}],\n    }\n\n    _show_markdown(diff, show_hash=True)\n\n    out, _ = capsys.readouterr()\n    assert out == (\n        \"| Status       | Hash               | Path                 |\\n\"\n        \"|--------------|--------------------|----------------------|\\n\"\n        \"| added        | 00000000           | file                 |\\n\"\n        \"| deleted      | 22222              | zoo                  |\\n\"\n        f\"| deleted      | XXXXXXXX           | data{os.path.sep}                |\\n\"\n        f\"| deleted      | 11111111           | data{os.path.sep}foo             |\\n\"\n        f\"| deleted      | 00000000           | data{os.path.sep}bar             |\\n\"\n        \"| renamed      | 11111111           | file_old -> file_new |\\n\"\n        \"| modified     | AAAAAAAA..BBBBBBBB | file                 |\\n\"\n        \"| not in cache | 12345678           | file2                |\\n\"\n        \"\\n\"\n    )\n\n\ndef test_show_markdown_hide_missing(capsys):\n    diff = {\n        \"deleted\": [\n            {\"path\": \"zoo\"},\n            {\"path\": os.path.join(\"data\", \"\")},\n            {\"path\": os.path.join(\"data\", \"foo\")},\n            {\"path\": os.path.join(\"data\", \"bar\")},\n        ],\n        \"modified\": [{\"path\": \"file\"}],\n        \"added\": [{\"path\": \"file\"}],\n        \"renamed\": [{\"path\": {\"old\": \"file_old\", \"new\": \"file_new\"}}],\n        \"not in cache\": [{\"path\": \"file2\"}],\n    }\n\n    _show_markdown(diff, hide_missing=True)\n\n    out, _ = capsys.readouterr()\n    assert out == (\n        \"| Status   | Path                 |\\n\"\n        \"|----------|----------------------|\\n\"\n        \"| added    | file                 |\\n\"\n        \"| deleted  | zoo                  |\\n\"\n        f\"| deleted  | data{os.path.sep}                |\\n\"\n        f\"| deleted  | data{os.path.sep}foo             |\\n\"\n        f\"| deleted  | data{os.path.sep}bar             |\\n\"\n        \"| renamed  | file_old -> file_new |\\n\"\n        \"| modified | file                 |\\n\"\n        \"\\n\"\n    )\n\n\ndef test_hide_missing(mocker, capsys, dvc):\n    args = parse_args([\"diff\", \"--hide-missing\"])\n    cmd = args.func(args)\n    diff = {\n        \"added\": [{\"path\": \"file\", \"hash\": \"00000000\"}],\n        \"deleted\": [],\n        \"modified\": [],\n        \"renamed\": [\n            {\n                \"path\": {\"old\": \"file_old\", \"new\": \"file_new\"},\n                \"hash\": \"11111111\",\n            }\n        ],\n        \"not in cache\": [],\n    }\n    mocker.patch(\"dvc.repo.Repo.diff\", return_value=diff)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert (\n        \"Added:\\n\"\n        \"    file\\n\"\n        \"\\n\"\n        \"Renamed:\\n\"\n        \"    file_old -> file_new\\n\"\n        \"\\n\"\n        \"files summary: 1 added, 1 renamed\" in out\n    )\n    assert \"not in cache\" not in out\n"
  },
  {
    "path": "tests/unit/command/test_du.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.du import CmdDU\n\n\ndef test_du(mocker):\n    cli_args = parse_args([\"du\", \"myurl\", \"mypath\", \"--summarize\", \"--rev\", \"myrev\"])\n    assert cli_args.func == CmdDU\n\n    cmd = cli_args.func(cli_args)\n    mock_du = mocker.patch(\"dvc.repo.Repo.du\")\n\n    assert cmd.run() == 0\n    mock_du.assert_called_once_with(\n        \"myurl\",\n        \"mypath\",\n        rev=\"myrev\",\n        summarize=True,\n        config=None,\n        remote=None,\n        remote_config=None,\n    )\n"
  },
  {
    "path": "tests/unit/command/test_experiments.py",
    "content": "import pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.experiments.apply import CmdExperimentsApply\nfrom dvc.commands.experiments.branch import CmdExperimentsBranch\nfrom dvc.commands.experiments.clean import CmdExperimentsClean\nfrom dvc.commands.experiments.diff import CmdExperimentsDiff\nfrom dvc.commands.experiments.ls import CmdExperimentsList\nfrom dvc.commands.experiments.pull import CmdExperimentsPull\nfrom dvc.commands.experiments.push import CmdExperimentsPush\nfrom dvc.commands.experiments.remove import CmdExperimentsRemove\nfrom dvc.commands.experiments.rename import CmdExperimentsRename\nfrom dvc.commands.experiments.run import CmdExperimentsRun\nfrom dvc.commands.experiments.save import CmdExperimentsSave\nfrom dvc.commands.experiments.show import CmdExperimentsShow\nfrom dvc.exceptions import InvalidArgumentError\n\nfrom .test_repro import common_arguments as repro_arguments\n\n\ndef test_experiments_apply(dvc, scm, mocker):\n    cli_args = parse_args([\"experiments\", \"apply\", \"exp_rev\"])\n    assert cli_args.func == CmdExperimentsApply\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.apply.apply\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(cmd.repo, \"exp_rev\")\n\n\ndef test_experiments_diff(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--all\",\n            \"--param-deps\",\n            \"--json\",\n            \"--md\",\n            \"--precision\",\n            \"10\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsDiff\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.diff.diff\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo, a_rev=\"HEAD~10\", b_rev=\"HEAD~1\", all=True, param_deps=True\n    )\n\n\ndef test_experiments_diff_revs(mocker, capsys, dvc, scm):\n    mocker.patch(\n        \"dvc.repo.experiments.diff.diff\",\n        return_value={\n            \"params\": {\"params.yaml\": {\"foo\": {\"diff\": 1, \"old\": 1, \"new\": 2}}},\n            \"metrics\": {\"metrics.yaml\": {\"foo\": {\"diff\": 1, \"old\": 1, \"new\": 2}}},\n        },\n    )\n\n    cli_args = parse_args([\"exp\", \"diff\", \"exp_a\", \"exp_b\"])\n    cmd = cli_args.func(cli_args)\n\n    capsys.readouterr()\n    assert cmd.run() == 0\n    cap = capsys.readouterr()\n    assert \"exp_a\" in cap.out\n    assert \"exp_b\" in cap.out\n\n\ndef test_experiments_show(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"show\",\n            \"--all-tags\",\n            \"--all-branches\",\n            \"--all-commits\",\n            \"--hide-queued\",\n            \"--hide-failed\",\n            \"--hide-workspace\",\n            \"--sha\",\n            \"--param-deps\",\n            \"-n\",\n            \"1\",\n            \"--rev\",\n            \"foo\",\n            \"--force\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsShow\n\n    cmd = cli_args.func(cli_args)\n\n    m = mocker.patch(\"dvc.repo.experiments.show.show\", return_value={})\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        all_tags=True,\n        all_branches=True,\n        all_commits=True,\n        hide_queued=True,\n        hide_failed=True,\n        hide_workspace=True,\n        num=1,\n        revs=[\"foo\"],\n        sha_only=True,\n        param_deps=True,\n        fetch_running=True,\n        force=True,\n    )\n\n\ndef test_experiments_run(dvc, scm, mocker):\n    default_arguments = {\n        \"params\": [],\n        \"name\": None,\n        \"queue\": False,\n        \"run_all\": False,\n        \"jobs\": 1,\n        \"tmp_dir\": False,\n        \"copy_paths\": [],\n        \"message\": None,\n        \"no_hydra\": False,\n    }\n    default_arguments.update(repro_arguments)\n\n    cmd = CmdExperimentsRun(parse_args([\"exp\", \"run\"]))\n    mocker.patch.object(cmd.repo, \"reproduce\")\n    mocker.patch.object(cmd.repo.experiments, \"run\")\n    cmd.run()\n    cmd.repo.experiments.run.assert_called_with(**default_arguments)\n\n\n@pytest.mark.parametrize(\"flag\", [\"-m\", \"-M\", \"--message\"])\ndef test_experiments_run_message(dvc, scm, mocker, flag):\n    default_arguments = {\n        \"params\": [],\n        \"name\": None,\n        \"queue\": False,\n        \"run_all\": False,\n        \"jobs\": 1,\n        \"tmp_dir\": False,\n        \"copy_paths\": [],\n        \"message\": \"mymessage\",\n        \"no_hydra\": False,\n    }\n    default_arguments.update(repro_arguments)\n\n    cmd = CmdExperimentsRun(parse_args([\"exp\", \"run\", flag, \"mymessage\"]))\n    mocker.patch.object(cmd.repo, \"reproduce\")\n    mocker.patch.object(cmd.repo.experiments, \"run\")\n    cmd.run()\n    cmd.repo.experiments.run.assert_called_with(**default_arguments)\n\n\ndef test_experiments_branch(dvc, scm, mocker):\n    m = mocker.patch(\"dvc.repo.experiments.branch.branch\", return_value={})\n\n    cli_args = parse_args([\"experiments\", \"branch\", \"expname\"])\n    assert cli_args.func == CmdExperimentsBranch\n\n    cmd = cli_args.func(cli_args)\n    assert cmd.run() == 0\n\n    m.assert_called_with(cmd.repo, \"expname\", None)\n\n    cli_args = parse_args([\"experiments\", \"branch\", \"expname\", \"branchname\"])\n    cmd = cli_args.func(cli_args)\n    assert cmd.run() == 0\n\n    m.assert_called_with(cmd.repo, \"expname\", \"branchname\")\n\n\ndef test_experiments_list(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"list\",\n            \"origin\",\n            \"--all-commits\",\n            \"-n\",\n            \"-1\",\n            \"--rev\",\n            \"foo\",\n            \"--name-only\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsList\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.ls.ls\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        git_remote=\"origin\",\n        rev=[\"foo\"],\n        all_commits=True,\n        num=-1,\n    )\n\n\n@pytest.mark.parametrize(\n    \"args,expected\",\n    [\n        ([], \"master:\\n\\tsha-a [exp-a]\\n\"),\n        ([\"--name-only\"], \"exp-a\\n\"),\n        ([\"--sha-only\"], \"sha-a\\n\"),\n    ],\n)\ndef test_experiments_list_format(mocker, capsys, args, expected, dvc, scm):\n    mocker.patch(\n        \"dvc.repo.experiments.ls.ls\",\n        return_value={\n            scm.get_rev(): [\n                (\"exp-a\", \"sha-a\"),\n            ]\n        },\n    )\n    raw_args = [\"experiments\", \"list\", *args]\n    cli_args = parse_args(raw_args)\n\n    cmd = cli_args.func(cli_args)\n\n    capsys.readouterr()\n    assert cmd.run() == 0\n    cap = capsys.readouterr()\n    assert cap.out == expected\n\n\ndef test_experiments_list_remote(mocker, capsys, dvc, scm):\n    mocker.patch(\n        \"dvc.repo.experiments.ls.ls\",\n        return_value={\n            \"main\": [\n                (\"exp-a\", None),\n            ]\n        },\n    )\n    cli_args = parse_args([\"experiments\", \"list\", \"git_remote\"])\n\n    cmd = cli_args.func(cli_args)\n\n    capsys.readouterr()\n    assert cmd.run() == 0\n    cap = capsys.readouterr()\n    assert cap.out == \"main:\\n\\texp-a\\n\"\n\n    cli_args = parse_args([\"experiments\", \"list\", \"git_remote\", \"--sha-only\"])\n\n    cmd = cli_args.func(cli_args)\n\n    capsys.readouterr()\n\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n\n\ndef test_experiments_push(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"push\",\n            \"origin\",\n            \"experiment1\",\n            \"experiment2\",\n            \"--all-commits\",\n            \"-n\",\n            \"2\",\n            \"--rev\",\n            \"foo\",\n            \"--force\",\n            \"--no-cache\",\n            \"--remote\",\n            \"my-remote\",\n            \"--jobs\",\n            \"1\",\n            \"--run-cache\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsPush\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.push.push\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        \"origin\",\n        [\"experiment1\", \"experiment2\"],\n        rev=[\"foo\"],\n        all_commits=True,\n        num=2,\n        force=True,\n        push_cache=False,\n        dvc_remote=\"my-remote\",\n        jobs=1,\n        run_cache=True,\n    )\n\n    cli_args = parse_args([\"experiments\", \"push\", \"origin\"])\n    cmd = cli_args.func(cli_args)\n\n    assert cmd.run() == 0\n\n\ndef test_experiments_pull(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"pull\",\n            \"origin\",\n            \"experiment\",\n            \"--all-commits\",\n            \"--rev\",\n            \"foo\",\n            \"--force\",\n            \"--no-cache\",\n            \"--remote\",\n            \"my-remote\",\n            \"--jobs\",\n            \"1\",\n            \"--run-cache\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsPull\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.pull.pull\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        \"origin\",\n        [\"experiment\"],\n        rev=[\"foo\"],\n        all_commits=True,\n        num=1,\n        force=True,\n        pull_cache=False,\n        dvc_remote=\"my-remote\",\n        jobs=1,\n        run_cache=True,\n    )\n\n    cli_args = parse_args([\"experiments\", \"pull\", \"origin\"])\n    cmd = cli_args.func(cli_args)\n\n    assert cmd.run() == 0\n\n\ndef test_experiments_remove_flag(dvc, scm, mocker, capsys, caplog):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"remove\",\n            \"--all-commits\",\n            \"--rev\",\n            \"foo\",\n            \"--num\",\n            \"2\",\n            \"--git-remote\",\n            \"myremote\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsRemove\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.remove.remove\", return_value={})\n    assert cmd.run() == 0\n    m.assert_called_once_with(\n        cmd.repo,\n        exp_names=[],\n        all_commits=True,\n        rev=[\"foo\"],\n        num=2,\n        queue=False,\n        git_remote=\"myremote\",\n        keep=False,\n    )\n\n\ndef test_experiments_remove_special(dvc, scm, mocker, capsys, caplog):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"remove\",\n            \"--git-remote\",\n            \"myremote\",\n            \"exp-123\",\n            \"exp-234\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsRemove\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.remove.remove\", return_value={})\n    assert cmd.run() == 0\n    m.assert_called_once_with(\n        cmd.repo,\n        exp_names=[\"exp-123\", \"exp-234\"],\n        all_commits=False,\n        rev=None,\n        num=1,\n        queue=False,\n        git_remote=\"myremote\",\n        keep=False,\n    )\n\n\ndef test_experiments_remove_invalid(dvc, scm, mocker, capsys, caplog):\n    cmd = CmdExperimentsRemove(\n        parse_args([\"exp\", \"remove\", \"--all-commits\", \"exp-1\", \"exp-2\"])\n    )\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n    cmd = CmdExperimentsRemove(parse_args([\"exp\", \"remove\"]))\n    with pytest.raises(InvalidArgumentError) as excinfo:\n        cmd.run()\n    assert (\n        str(excinfo.value) == \"Either provide an `experiment` argument\"\n        \", or use the `--rev` or `--all-commits` or `--queue` flag.\"\n    )\n\n\ndef test_experiments_rename_flag(dvc, scm, mocker, capsys, caplog):\n    cli_args = parse_args(\n        [\n            \"experiments\",\n            \"rename\",\n            \"--git-remote\",\n            \"myremote\",\n            \"exp-123\",\n            \"exp-234\",\n        ]\n    )\n    assert cli_args.func == CmdExperimentsRename\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.rename.rename\", return_value={})\n    assert cmd.run() == 0\n    m.assert_called_once_with(\n        cmd.repo,\n        exp_name=\"exp-123\",\n        new_name=\"exp-234\",\n        git_remote=\"myremote\",\n        force=False,\n    )\n\n\ndef test_experiments_rename_invalid(dvc, scm, mocker, capsys, caplog):\n    cmd = CmdExperimentsRename(parse_args([\"exp\", \"rename\", \"exp-1\"]))\n    with pytest.raises(InvalidArgumentError) as excinfo:\n        cmd.run()\n    assert (\n        str(excinfo.value)\n        == \"An experiment to rename and a new experiment name are required.\"\n    )\n\n\ndef test_experiments_save(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\"exp\", \"save\", \"target\", \"--name\", \"exp-name\", \"--recursive\", \"--force\"]\n    )\n    assert cli_args.func == CmdExperimentsSave\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.save.save\", return_value=\"acabb\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        targets=[\"target\"],\n        name=\"exp-name\",\n        recursive=True,\n        force=True,\n        include_untracked=[],\n        message=None,\n    )\n\n\n@pytest.mark.parametrize(\"flag\", [\"-m\", \"-M\", \"--message\"])\ndef test_experiments_save_message(dvc, scm, mocker, flag):\n    cli_args = parse_args([\"exp\", \"save\", flag, \"custom commit message\"])\n    assert cli_args.func == CmdExperimentsSave\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.save.save\", return_value=\"acabb\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        targets=[],\n        name=None,\n        recursive=False,\n        force=False,\n        include_untracked=[],\n        message=\"custom commit message\",\n    )\n\n\ndef test_experiments_clean(dvc, scm, mocker):\n    cli_args = parse_args([\"experiments\", \"clean\"])\n    assert cli_args.func == CmdExperimentsClean\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.clean.clean\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(cmd.repo)\n"
  },
  {
    "path": "tests/unit/command/test_gc.py",
    "content": "import pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.gc import CmdGC\nfrom dvc.exceptions import InvalidArgumentError\n\n\ndef test_(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"gc\",\n            \"--workspace\",\n            \"--all-tags\",\n            \"--all-branches\",\n            \"--all-commits\",\n            \"--all-experiments\",\n            \"--date\",\n            \"2022-06-30\",\n            \"--cloud\",\n            \"--remote\",\n            \"origin\",\n            \"--force\",\n            \"--jobs\",\n            \"3\",\n            \"--dry\",\n            \"--projects\",\n            \"project1\",\n            \"project2\",\n            \"--skip-failed\",\n        ]\n    )\n    assert cli_args.func == CmdGC\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.gc\", return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        workspace=True,\n        all_tags=True,\n        all_branches=True,\n        all_commits=True,\n        all_experiments=True,\n        commit_date=\"2022-06-30\",\n        cloud=True,\n        remote=\"origin\",\n        force=True,\n        jobs=3,\n        repos=[\"project1\", \"project2\"],\n        rev=None,\n        num=None,\n        not_in_remote=False,\n        dry=True,\n        skip_failed=True,\n    )\n\n    cli_args = parse_args([\"gc\"])\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n\n    cli_args = parse_args([\"gc\", \"--num\", \"2\"])\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n\n    cli_args = parse_args([\"gc\", \"--all-branches\", \"--num\", \"2\", \"--force\"])\n    cmd = cli_args.func(cli_args)\n\n    assert cmd.run() == 0\n\n    m.assert_called_with(\n        workspace=False,\n        all_tags=False,\n        all_branches=True,\n        all_commits=False,\n        all_experiments=False,\n        commit_date=None,\n        cloud=False,\n        remote=None,\n        force=True,\n        jobs=None,\n        repos=None,\n        rev=None,\n        num=2,\n        not_in_remote=False,\n        dry=False,\n        skip_failed=False,\n    )\n\n    cli_args = parse_args([\"gc\", \"--cloud\", \"--not-in-remote\"])\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n\n    cli_args = parse_args([\"gc\", \"--remote\", \"myremote\"])\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        cmd.run()\n"
  },
  {
    "path": "tests/unit/command/test_get.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.get import CmdGet\n\n\ndef test_get(mocker):\n    cli_args = parse_args(\n        [\n            \"get\",\n            \"repo_url\",\n            \"src\",\n            \"--out\",\n            \"out\",\n            \"--rev\",\n            \"version\",\n            \"--jobs\",\n            \"4\",\n            \"--config\",\n            \"myconfig\",\n            \"--remote\",\n            \"myremote\",\n            \"--remote-config\",\n            \"k1=v1\",\n            \"k2=v2\",\n        ]\n    )\n    assert cli_args.func == CmdGet\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.get\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"repo_url\",\n        path=\"src\",\n        out=\"out\",\n        rev=\"version\",\n        jobs=4,\n        config=\"myconfig\",\n        force=False,\n        remote=\"myremote\",\n        remote_config={\"k1\": \"v1\", \"k2\": \"v2\"},\n    )\n\n\ndef test_get_url(mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"get\",\n            \"repo_url\",\n            \"src\",\n            \"--rev\",\n            \"version\",\n            \"--remote\",\n            \"myremote\",\n            \"--show-url\",\n            \"--remote-config\",\n            \"k1=v1\",\n            \"k2=v2\",\n        ]\n    )\n    assert cli_args.func == CmdGet\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.api.get_url\", return_value=\"resource_url\")\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert \"resource_url\" in out\n\n    m.assert_called_once_with(\n        \"src\",\n        repo=\"repo_url\",\n        rev=\"version\",\n        remote=\"myremote\",\n        remote_config={\"k1\": \"v1\", \"k2\": \"v2\"},\n    )\n"
  },
  {
    "path": "tests/unit/command/test_get_url.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.get_url import CmdGetUrl\nfrom dvc.config import Config\nfrom dvc.testing import matchers as M\n\n\ndef test_get_url(mocker):\n    cli_args = parse_args([\"get-url\", \"src\", \"out\", \"-j\", \"5\"])\n    assert cli_args.func == CmdGetUrl\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.get_url\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"src\",\n        out=\"out\",\n        jobs=5,\n        force=False,\n        fs_config=None,\n        config=M.instance_of(Config),\n    )\n"
  },
  {
    "path": "tests/unit/command/test_git_hook.py",
    "content": "import pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.git_hook import CmdPostCheckout, CmdPreCommit, CmdPrePush\n\n\n@pytest.mark.parametrize(\n    \"hook, cls\",\n    [\n        (\"pre-commit\", CmdPreCommit),\n        (\"post-checkout\", CmdPostCheckout),\n        (\"pre-push\", CmdPrePush),\n    ],\n)\ndef test_out_of_repo(tmp_dir, hook, cls, mocker):\n    cli_args = parse_args([\"git-hook\", hook])\n    assert cli_args.func == cls\n    cmd = cli_args.func(cli_args)\n    mock_main = mocker.patch(\"dvc.cli.main\")\n    assert cmd.run() == 0\n    assert not mock_main.called\n"
  },
  {
    "path": "tests/unit/command/test_help.py",
    "content": "import logging\nimport re\nfrom argparse import SUPPRESS, ArgumentParser\nfrom itertools import takewhile\n\nimport pytest\nimport shtab\n\nfrom dvc.cli import main\nfrom dvc.cli.parser import get_main_parser\n\n\ndef command_tuples():\n    root: tuple[str, ...] = ()\n    commands = [root]\n\n    def recurse_parser(parser: ArgumentParser, parents: tuple[str, ...] = root) -> None:\n        for positional in parser._get_positional_actions():\n            if positional.help != SUPPRESS and isinstance(positional.choices, dict):\n                public_cmds = shtab.get_public_subcommands(positional)\n                for subcmd, subparser in positional.choices.items():\n                    cmd = (*parents, subcmd)\n                    if subcmd in public_cmds:\n                        commands.append(cmd)\n                    recurse_parser(subparser, cmd)\n\n    main_parser = get_main_parser()\n    recurse_parser(main_parser)\n\n    # the no. of commands will usually go up,\n    # but if we ever remove commands and drop below, adjust the magic number accordingly\n    assert len(commands) >= 116\n    return sorted(commands)\n\n\ndef ids(values):\n    return \"-\".join(values) or \"dvc\"\n\n\n@pytest.mark.parametrize(\"command_tuples\", command_tuples(), ids=ids)\ndef test_help(caplog, capsys, command_tuples):\n    with caplog.at_level(logging.INFO), pytest.raises(SystemExit) as e:\n        main([*command_tuples, \"--help\"])\n    assert e.value.code == 0\n    assert not caplog.text\n\n    out, err = capsys.readouterr()\n\n    # validate metavars are all in lowercase\n    usage = \"\\n\".join(takewhile(lambda o: bool(o), out.splitlines()))\n\n    message = (\n        \"metavars not lowercased, you are likely missing formatter_class=XXX \"\n        \"in the command, where XXX should be any of the classes from \"\n        \"`dvc.cli.formatter`, which automatically lowercases metavars.\\n\"\n        \"\\nExample:\\n\"\n        \"\\nfrom dvc.cli import formatter\\n\"\n        \"\\nparser.add_parser(..., formatter_class=formatter.TextHelpFormatter)\\n\"\n    )\n    assert not re.findall(r\"\\b[A-Z]{2,}\\b\", usage, re.MULTILINE), message\n\n    assert not err\n    assert out\n"
  },
  {
    "path": "tests/unit/command/test_imp.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.imp import CmdImport\n\n\ndef test_import(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"import\",\n            \"repo_url\",\n            \"src\",\n            \"--out\",\n            \"out\",\n            \"--rev\",\n            \"version\",\n            \"--jobs\",\n            \"3\",\n            \"--config\",\n            \"myconfig\",\n            \"--remote\",\n            \"myremote\",\n            \"--remote-config\",\n            \"k1=v1\",\n            \"k2=v2\",\n        ]\n    )\n    assert cli_args.func == CmdImport\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"repo_url\",\n        path=\"src\",\n        out=\"out\",\n        rev=\"version\",\n        no_exec=False,\n        no_download=False,\n        jobs=3,\n        config=\"myconfig\",\n        remote=\"myremote\",\n        remote_config={\"k1\": \"v1\", \"k2\": \"v2\"},\n        force=False,\n    )\n\n\ndef test_import_no_exec(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"import\",\n            \"repo_url\",\n            \"src\",\n            \"--out\",\n            \"out\",\n            \"--rev\",\n            \"version\",\n            \"--no-exec\",\n        ]\n    )\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"repo_url\",\n        path=\"src\",\n        out=\"out\",\n        rev=\"version\",\n        no_exec=True,\n        no_download=False,\n        jobs=None,\n        config=None,\n        remote=None,\n        remote_config=None,\n        force=False,\n    )\n\n\ndef test_import_no_download(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"import\",\n            \"repo_url\",\n            \"src\",\n            \"--out\",\n            \"out\",\n            \"--rev\",\n            \"version\",\n            \"--no-download\",\n        ]\n    )\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"repo_url\",\n        path=\"src\",\n        out=\"out\",\n        rev=\"version\",\n        no_exec=False,\n        no_download=True,\n        jobs=None,\n        config=None,\n        remote=None,\n        remote_config=None,\n        force=False,\n    )\n"
  },
  {
    "path": "tests/unit/command/test_imp_url.py",
    "content": "import logging\n\nimport pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.imp_url import CmdImportUrl\nfrom dvc.exceptions import DvcException\n\n\ndef test_import_url(mocker, dvc):\n    cli_args = parse_args([\"import-url\", \"src\", \"out\", \"--jobs\", \"4\"])\n    assert cli_args.func == CmdImportUrl\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp_url\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"src\",\n        out=\"out\",\n        no_exec=False,\n        no_download=False,\n        remote=None,\n        to_remote=False,\n        jobs=4,\n        force=False,\n        version_aware=False,\n        fs_config=None,\n    )\n\n\ndef test_failed_import_url(mocker, caplog, dvc):\n    cli_args = parse_args([\"import-url\", \"http://somesite.com/file_name\"])\n    assert cli_args.func == CmdImportUrl\n\n    cmd = cli_args.func(cli_args)\n    mocker.patch.object(cmd.repo, \"imp_url\", side_effect=DvcException(\"error\"))\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert cmd.run() == 1\n        expected_error = (\n            \"failed to import http://somesite.com/file_name. \"\n            \"You could also try downloading it manually, and \"\n            \"adding it with `dvc add`.\"\n        )\n        assert expected_error in caplog.text\n\n\n@pytest.mark.parametrize(\n    \"flag,expected\",\n    [\n        (\"--no-exec\", {\"no_exec\": True, \"no_download\": False}),\n        (\"--no-download\", {\"no_download\": True, \"no_exec\": False}),\n    ],\n)\ndef test_import_url_no_exec_download_flags(mocker, flag, expected, dvc):\n    cli_args = parse_args([\"import-url\", flag, \"src\", \"out\"])\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp_url\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"src\",\n        out=\"out\",\n        remote=None,\n        to_remote=False,\n        jobs=None,\n        force=False,\n        version_aware=False,\n        fs_config=None,\n        **expected,\n    )\n\n\ndef test_import_url_to_remote(mocker, dvc):\n    cli_args = parse_args(\n        [\n            \"import-url\",\n            \"s3://bucket/foo\",\n            \"bar\",\n            \"--to-remote\",\n            \"--remote\",\n            \"remote\",\n        ]\n    )\n    assert cli_args.func == CmdImportUrl\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"imp_url\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"s3://bucket/foo\",\n        out=\"bar\",\n        no_exec=False,\n        no_download=False,\n        remote=\"remote\",\n        to_remote=True,\n        jobs=None,\n        force=False,\n        version_aware=False,\n        fs_config=None,\n    )\n\n\n@pytest.mark.parametrize(\"flag\", [\"--no-exec\", \"--no-download\", \"--version-aware\"])\ndef test_import_url_to_remote_invalid_combination(dvc, mocker, caplog, flag):\n    cli_args = parse_args(\n        [\n            \"import-url\",\n            \"s3://bucket/foo\",\n            \"bar\",\n            \"--to-remote\",\n            \"--remote\",\n            \"remote\",\n            flag,\n        ]\n    )\n    assert cli_args.func == CmdImportUrl\n\n    cmd = cli_args.func(cli_args)\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert cmd.run() == 1\n        expected_msg = (\n            \"--no-exec/--no-download/--version-aware cannot be combined with \"\n            \"--to-remote\"\n        )\n        assert expected_msg in caplog.text\n\n\ndef test_import_url_to_remote_flag(dvc, mocker, caplog):\n    cli_args = parse_args(\n        [\"import-url\", \"s3://bucket/foo\", \"bar\", \"--remote\", \"remote\"]\n    )\n\n    cmd = cli_args.func(cli_args)\n    with caplog.at_level(logging.ERROR, logger=\"dvc\"):\n        assert cmd.run() == 1\n        expected_msg = \"--remote can't be used without --to-remote\"\n        assert expected_msg in caplog.text\n"
  },
  {
    "path": "tests/unit/command/test_ls_url.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.ls_url import CmdListUrl\nfrom dvc.config import Config\nfrom dvc.fs import LocalFileSystem\nfrom dvc.testing import matchers as M\n\n\ndef test_ls_url(mocker):\n    cli_args = parse_args([\"ls-url\", \"src\"])\n    assert cli_args.func == CmdListUrl\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.ls_url\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"src\",\n        recursive=False,\n        maxdepth=None,\n        fs_config=None,\n        config=M.instance_of(Config),\n    )\n\n\ndef test_recursive(mocker):\n    cli_args = parse_args([\"ls-url\", \"-R\", \"-L\", \"2\", \"src\"])\n    assert cli_args.func == CmdListUrl\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.ls_url\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        \"src\", recursive=True, maxdepth=2, fs_config=None, config=M.instance_of(Config)\n    )\n\n\ndef test_tree(mocker):\n    cli_args = parse_args([\"ls-url\", \"--tree\", \"--level\", \"2\", \"src\"])\n    assert cli_args.func == CmdListUrl\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.ls._ls_tree\", autospec=True)\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(M.instance_of(LocalFileSystem), \"src\", maxdepth=2)\n"
  },
  {
    "path": "tests/unit/command/test_metrics.py",
    "content": "import json\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.metrics import CmdMetricsDiff, CmdMetricsShow\n\n\ndef test_metrics_diff(dvc, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"metrics\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--all\",\n            \"--md\",\n            \"--targets\",\n            \"target1\",\n            \"target2\",\n            \"--no-path\",\n        ]\n    )\n\n    assert cli_args.func == CmdMetricsDiff\n\n    cmd = cli_args.func(cli_args)\n    diff = {\n        \"diff\": {\"metrics.yaml\": {\"\": {\"old\": 1, \"new\": 3}}},\n        \"errors\": {\"workspace\": Exception},\n    }\n    metrics_diff = mocker.patch(\"dvc.repo.metrics.diff.diff\", return_value=diff)\n    show_diff_mock = mocker.patch(\"dvc.compare.show_diff\")\n\n    assert cmd.run() == 0\n\n    metrics_diff.assert_called_once_with(\n        cmd.repo,\n        targets=[\"target1\", \"target2\"],\n        a_rev=\"HEAD~10\",\n        b_rev=\"HEAD~1\",\n        all=True,\n    )\n    show_diff_mock.assert_called_once_with(\n        diff[\"diff\"],\n        title=\"Metric\",\n        no_path=True,\n        precision=5,\n        markdown=True,\n        round_digits=True,\n        a_rev=\"HEAD~10\",\n        b_rev=\"HEAD~1\",\n    )\n\n\ndef test_metrics_diff_json(dvc, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"metrics\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--all\",\n            \"--json\",\n            \"--targets\",\n            \"target1\",\n            \"target2\",\n            \"--no-path\",\n            \"--precision\",\n            \"10\",\n        ]\n    )\n\n    assert cli_args.func == CmdMetricsDiff\n    cmd = cli_args.func(cli_args)\n\n    diff = {\"diff\": {\"metrics.yaml\": {\"\": {\"old\": 1, \"new\": 3}}}}\n    metrics_diff = mocker.patch(\"dvc.repo.metrics.diff.diff\", return_value=diff)\n    show_diff_mock = mocker.patch(\"dvc.compare.show_diff\")\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    metrics_diff.assert_called_once_with(\n        cmd.repo,\n        targets=[\"target1\", \"target2\"],\n        a_rev=\"HEAD~10\",\n        b_rev=\"HEAD~1\",\n        all=True,\n    )\n    show_diff_mock.assert_not_called()\n    assert json.dumps(diff[\"diff\"]) in out\n\n\ndef test_metrics_show(dvc, mocker):\n    cli_args = parse_args(\n        [\n            \"metrics\",\n            \"show\",\n            \"--all-tags\",\n            \"--all-branches\",\n            \"--all-commits\",\n            \"target1\",\n            \"target2\",\n            \"--precision\",\n            \"8\",\n        ]\n    )\n    assert cli_args.func == CmdMetricsShow\n\n    cmd = cli_args.func(cli_args)\n    m1 = mocker.patch(\"dvc.repo.metrics.show.show\", return_value={})\n    m2 = mocker.patch(\"dvc.compare.show_metrics\", return_value=\"\")\n\n    assert cmd.run() == 0\n\n    m1.assert_called_once_with(\n        cmd.repo,\n        [\"target1\", \"target2\"],\n        all_tags=True,\n        all_branches=True,\n        all_commits=True,\n    )\n    m2.assert_called_once_with(\n        {},\n        markdown=False,\n        all_tags=True,\n        all_branches=True,\n        all_commits=True,\n        precision=8,\n        round_digits=True,\n    )\n\n\ndef test_metrics_show_json(dvc, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"metrics\",\n            \"show\",\n            \"--json\",\n            \"--all-tags\",\n            \"--all-branches\",\n            \"--all-commits\",\n            \"target1\",\n            \"target2\",\n            \"--precision\",\n            \"8\",\n        ]\n    )\n\n    assert cli_args.func == CmdMetricsShow\n    cmd = cli_args.func(cli_args)\n    d = {\n        \"branch_1\": {\"metrics.json\": {\"b\": {\"ad\": 1, \"bc\": 2}, \"c\": 4}},\n        \"branch_2\": {\"metrics.json\": {\"a\": 1, \"b\": {\"ad\": 3, \"bc\": 4}}},\n    }\n    metrics_show = mocker.patch(\"dvc.repo.metrics.show.show\", return_value=d)\n    show_metrics_mock = mocker.patch(\"dvc.compare.show_metrics\")\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    metrics_show.assert_called_once_with(\n        cmd.repo,\n        [\"target1\", \"target2\"],\n        all_tags=True,\n        all_branches=True,\n        all_commits=True,\n    )\n    show_metrics_mock.assert_not_called()\n    assert json.dumps(d) in out\n"
  },
  {
    "path": "tests/unit/command/test_params.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.params import CmdParamsDiff\n\n\ndef test_params_diff(dvc, mocker):\n    cli_args = parse_args(\n        [\n            \"params\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--targets\",\n            \"target\",\n            \"--all\",\n            \"--json\",\n            \"--md\",\n            \"--no-path\",\n            \"--deps\",\n        ]\n    )\n    assert cli_args.func == CmdParamsDiff\n\n    cmd = cli_args.func(cli_args)\n    params_diff = mocker.patch(\"dvc.repo.params.diff.diff\", return_value={})\n    show_diff_mock = mocker.patch(\"dvc.compare.show_diff\")\n\n    assert cmd.run() == 0\n\n    params_diff.assert_called_once_with(\n        cmd.repo,\n        a_rev=\"HEAD~10\",\n        b_rev=\"HEAD~1\",\n        targets=[\"target\"],\n        all=True,\n        deps_only=True,\n    )\n    show_diff_mock.assert_not_called()\n\n\ndef test_params_diff_from_cli(dvc, mocker):\n    cli_args = parse_args([\"params\", \"diff\"])\n    assert cli_args.func == CmdParamsDiff\n\n    cmd = cli_args.func(cli_args)\n    params_diff = mocker.patch(\"dvc.repo.params.diff.diff\", return_value={})\n    show_diff_mock = mocker.patch(\"dvc.compare.show_diff\")\n\n    assert cmd.run() == 0\n\n    params_diff.assert_called_once_with(\n        cmd.repo,\n        a_rev=\"HEAD\",\n        b_rev=\"workspace\",\n        all=False,\n        targets=None,\n        deps_only=False,\n    )\n    show_diff_mock.assert_called_once_with(\n        {},\n        title=\"Param\",\n        markdown=False,\n        no_path=False,\n        show_changes=False,\n        a_rev=\"HEAD\",\n        b_rev=\"workspace\",\n    )\n\n\ndef test_params_diff_show_json(dvc, mocker, capsys):\n    cli_args = parse_args([\"params\", \"diff\", \"HEAD~10\", \"HEAD~1\", \"--json\"])\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\n        \"dvc.repo.params.diff.diff\",\n        return_value={\n            \"diff\": {\"params.yaml\": {\"a\": \"b\"}},\n            \"errors\": {\"rev\": Exception},\n        },\n    )\n    show_diff_mock = mocker.patch(\"dvc.compare.show_diff\")\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert '{\"params.yaml\": {\"a\": \"b\"}}\\n' in out\n    show_diff_mock.assert_not_called()\n"
  },
  {
    "path": "tests/unit/command/test_plots.py",
    "content": "import json\nimport os\nimport posixpath\nfrom pathlib import Path\n\nimport pytest\nfrom funcy import set_in\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.plots import CmdPlotsDiff, CmdPlotsShow, CmdPlotsTemplates\nfrom dvc.render.match import RendererWithErrors\nfrom dvc.utils.serialize import YAMLFileCorruptedError\n\n\n@pytest.fixture\ndef plots_data():\n    return {\n        \"revision\": {\n            \"sources\": {\n                \"data\": {\n                    \"plot.csv\": {\"data\": [{\"val\": 1}, {\"val\": 2}], \"props\": {}},\n                    \"other.jpg\": {\"data\": b\"content\"},\n                }\n            },\n            \"definitions\": {\"data\": {\"dvc.yaml\": {\"data\": {\"plot.csv\": {}}}}},\n        }\n    }\n\n\ndef test_plots_diff(dvc, mocker, plots_data):\n    cli_args = parse_args(\n        [\n            \"plots\",\n            \"diff\",\n            \"--out\",\n            \"result.extension\",\n            \"-t\",\n            \"template\",\n            \"--targets\",\n            \"datafile\",\n            \"--show-vega\",\n            \"-x\",\n            \"x_field\",\n            \"-y\",\n            \"y_field\",\n            \"--title\",\n            \"my_title\",\n            \"--x-label\",\n            \"x_title\",\n            \"--y-label\",\n            \"y_title\",\n            \"--experiment\",\n            \"HEAD\",\n            \"tag1\",\n            \"tag2\",\n        ]\n    )\n    assert cli_args.func == CmdPlotsDiff\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n    render_mock = mocker.patch(\"dvc_render.render_html\", return_value=\"html_path\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cmd.repo,\n        targets=[\"datafile\"],\n        revs=[\"HEAD\", \"tag1\", \"tag2\"],\n        props={\n            \"template\": \"template\",\n            \"x\": \"x_field\",\n            \"y\": \"y_field\",\n            \"title\": \"my_title\",\n            \"x_label\": \"x_title\",\n            \"y_label\": \"y_title\",\n        },\n        experiment=True,\n    )\n    render_mock.assert_not_called()\n\n\ndef test_plots_show_vega(dvc, mocker, plots_data):\n    cli_args = parse_args(\n        [\n            \"plots\",\n            \"show\",\n            \"-o\",\n            \"result.extension\",\n            \"-t\",\n            \"template\",\n            \"--show-vega\",\n            \"--no-header\",\n            \"datafile\",\n        ]\n    )\n    assert cli_args.func == CmdPlotsShow\n\n    cmd = cli_args.func(cli_args)\n\n    m = mocker.patch(\n        \"dvc.repo.plots.Plots.show\",\n        return_value=plots_data,\n    )\n    render_mock = mocker.patch(\"dvc_render.render_html\", return_value=\"html_path\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"datafile\"],\n        props={\"template\": \"template\", \"header\": False},\n    )\n    render_mock.assert_not_called()\n\n\ndef test_plots_diff_vega(dvc, mocker, capsys, plots_data):\n    cli_args = parse_args(\n        [\n            \"plots\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--show-vega\",\n            \"--targets\",\n            \"plot.csv\",\n        ]\n    )\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n    mocker.patch(\n        \"dvc_render.VegaRenderer.get_filled_template\",\n        return_value={\"this\": \"is vega json\"},\n    )\n    render_mock = mocker.patch(\"dvc_render.render_html\")\n    assert cmd.run() == 0\n\n    out, _ = capsys.readouterr()\n\n    assert json.dumps({\"this\": \"is vega json\"}) in out\n    render_mock.assert_not_called()\n\n\n@pytest.mark.parametrize(\"auto_open\", [True, False])\ndef test_plots_diff_open(tmp_dir, dvc, mocker, capsys, plots_data, auto_open):\n    mocked_open = mocker.patch(\"webbrowser.open\", return_value=True)\n\n    args = [\"plots\", \"diff\", \"--targets\", \"plots.csv\"]\n\n    if auto_open:\n        with dvc.config.edit() as conf:\n            conf[\"plots\"][\"auto_open\"] = True\n    else:\n        args.append(\"--open\")\n\n    cli_args = parse_args(args)\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n\n    index_path = tmp_dir / \"dvc_plots\" / \"index.html\"\n    mocker.patch(\"dvc_render.render_html\", return_value=index_path)\n\n    assert cmd.run() == 0\n    mocked_open.assert_called_once_with(index_path.as_uri())\n\n    out, _ = capsys.readouterr()\n    assert index_path.as_uri() in out\n\n\ndef test_plots_diff_open_wsl(tmp_dir, dvc, mocker, plots_data):\n    mocked_open = mocker.patch(\"webbrowser.open\", return_value=True)\n    mocked_uname_result = mocker.MagicMock()\n    mocked_uname_result.release = \"microsoft\"\n    mocker.patch(\"platform.uname\", return_value=mocked_uname_result)\n\n    cli_args = parse_args([\"plots\", \"diff\", \"--targets\", \"plots.csv\", \"--open\"])\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n\n    index_path = tmp_dir / \"dvc_plots\" / \"index.html\"\n    mocker.patch(\"dvc_render.render_html\", return_value=index_path)\n\n    assert cmd.run() == 0\n    mocked_open.assert_called_once_with(str(Path(\"dvc_plots\") / \"index.html\"))\n\n\ndef test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys, plots_data):\n    mocked_open = mocker.patch(\"webbrowser.open\", return_value=False)\n    cli_args = parse_args([\"plots\", \"diff\", \"--targets\", \"plots.csv\", \"--open\"])\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n\n    assert cmd.run() == 1\n    expected_url = tmp_dir / \"dvc_plots\" / \"index.html\"\n    mocked_open.assert_called_once_with(expected_url.as_uri())\n\n    error_message = (\n        f\"Failed to open {expected_url.as_uri()}. Please try opening it manually.\"\n    )\n\n    out, err = capsys.readouterr()\n    assert expected_url.as_uri() in out\n    assert error_message in err\n\n\n@pytest.mark.parametrize(\n    \"output, expected_url_path\",\n    [\n        (\n            \"plots file with spaces\",\n            posixpath.join(\"plots%20file%20with%20spaces\", \"index.html\"),\n        ),\n        (\n            os.path.join(\"dir\", \"..\", \"plots\"),\n            posixpath.join(\"plots\", \"index.html\"),\n        ),\n    ],\n    ids=[\"quote\", \"resolve\"],\n)\ndef test_plots_path_is_quoted_and_resolved_properly(\n    tmp_dir, dvc, mocker, capsys, output, expected_url_path, plots_data\n):\n    cli_args = parse_args([\"plots\", \"diff\", \"--targets\", \"datafile\", \"--out\", output])\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n\n    assert cmd.run() == 0\n    expected_url = posixpath.join(tmp_dir.as_uri(), expected_url_path)\n\n    out, _ = capsys.readouterr()\n    assert expected_url in out\n\n\ndef test_should_pass_template_dir(tmp_dir, dvc, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"plots\",\n            \"diff\",\n            \"HEAD~1\",\n            \"--json\",\n            \"--targets\",\n            \"plot.csv\",\n        ]\n    )\n    cmd = cli_args.func(cli_args)\n\n    data = mocker.MagicMock()\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=data)\n\n    renderers = mocker.MagicMock()\n    match_renderers = mocker.patch(\n        \"dvc.render.match.match_defs_renderers\", return_value=renderers\n    )\n\n    assert cmd.run() == 0\n\n    match_renderers.assert_called_once_with(\n        data=data,\n        out=\"dvc_plots\",\n        templates_dir=str(tmp_dir / \".dvc/plots\"),\n    )\n\n\n@pytest.mark.parametrize(\"output\", [\"some_out\", os.path.join(\"to\", \"subdir\"), None])\ndef test_should_call_render(tmp_dir, mocker, capsys, plots_data, output):\n    cli_args = parse_args([\"plots\", \"diff\", \"--targets\", \"plots.csv\", \"--out\", output])\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=plots_data)\n\n    output = output or \"dvc_plots\"\n    index_path = tmp_dir / output / \"index.html\"\n    renderer = mocker.MagicMock()\n    mocker.patch(\n        \"dvc.render.match.match_defs_renderers\",\n        return_value=[RendererWithErrors(renderer, {}, {})],\n    )\n    render_mock = mocker.patch(\"dvc_render.render_html\", return_value=index_path)\n\n    assert cmd.run() == 0\n\n    out, _ = capsys.readouterr()\n    assert index_path.as_uri() in out\n\n    render_mock.assert_called_once_with(\n        renderers=[renderer],\n        output_file=Path(tmp_dir / output / \"index.html\"),\n        html_template=None,\n    )\n\n\ndef test_plots_diff_json(dvc, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"plots\",\n            \"diff\",\n            \"HEAD~10\",\n            \"HEAD~1\",\n            \"--json\",\n            \"--split\",\n            \"--targets\",\n            \"plot.csv\",\n            \"-o\",\n            \"out\",\n        ]\n    )\n    cmd = cli_args.func(cli_args)\n\n    data = mocker.MagicMock()\n    mocker.patch(\"dvc.repo.plots.diff.diff\", return_value=data)\n\n    renderers = mocker.MagicMock()\n    mocker.patch(\"dvc.render.match.match_defs_renderers\", return_value=renderers)\n    render_mock = mocker.patch(\"dvc_render.render_html\")\n\n    show_json_mock = mocker.patch(\"dvc.commands.plots._show_json\")\n\n    assert cmd.run() == 0\n\n    show_json_mock.assert_called_once_with(renderers, True, errors={})\n\n    render_mock.assert_not_called()\n\n\n@pytest.mark.parametrize(\n    \"target,expected_out,expected_rtn\",\n    [(\"t1\", \"\\\"{'t1'}\\\"\", 0), (None, \"t1\\nt2\", 0), (\"t3\", \"\", 1)],\n)\ndef test_plots_templates(dvc, mocker, capsys, target, expected_out, expected_rtn):\n    t1 = mocker.Mock()\n    t1.DEFAULT_NAME = \"t1\"\n    t1.DEFAULT_CONTENT = \"{'t1'}\"\n\n    t2 = mocker.Mock()\n    t2.DEFAULT_NAME = \"t2\"\n    t2.DEFAULT_CONTENT = \"{'t2'}\"\n\n    mocker.patch(\"dvc_render.vega_templates.TEMPLATES\", [t1, t2])\n\n    arguments = [\"plots\", \"templates\"]\n    if target:\n        arguments += [target]\n\n    cli_args = parse_args(arguments)\n    assert cli_args.func == CmdPlotsTemplates\n\n    cmd = cli_args.func(cli_args)\n\n    rtn = cmd.run()\n\n    out, _ = capsys.readouterr()\n\n    assert out.strip() == expected_out\n    assert rtn == expected_rtn\n\n\n@pytest.mark.parametrize(\"split\", [True, False])\ndef test_show_json(split, mocker, capsys):\n    import dvc.commands.plots\n\n    renderer = mocker.MagicMock()\n    renderer_obj = RendererWithErrors(renderer, {}, {})\n    renderer.name = \"rname\"\n    to_json_mock = mocker.patch(\n        \"dvc.render.convert.to_json\", return_value={\"renderer\": \"json\"}\n    )\n\n    dvc.commands.plots._show_json([renderer_obj], split)\n\n    to_json_mock.assert_called_once_with(renderer, split)\n\n    out, _ = capsys.readouterr()\n    assert json.dumps({\"rname\": {\"renderer\": \"json\"}}) in out\n\n\ndef test_show_json_no_renderers(capsys):\n    import dvc.commands.plots\n\n    dvc.commands.plots._show_json([])\n\n    out, _ = capsys.readouterr()\n    assert json.dumps({}) in out\n\n\ndef test_show_json_with_error(dvc, mocker, capsys):\n    cli_args = parse_args([\"plots\", \"show\", \"--json\"])\n    cmd = cli_args.func(cli_args)\n\n    e = YAMLFileCorruptedError(\"dvc.yaml\")\n    data = set_in({}, [\"workspace\", \"definitions\", \"error\"], e)\n    cmd._func = mocker.MagicMock(return_value=data)\n\n    cmd.run()\n    out, _ = capsys.readouterr()\n    assert json.loads(out) == {\n        \"errors\": [\n            {\n                \"rev\": \"workspace\",\n                \"type\": type(e).__name__,\n                \"msg\": e.args[0],\n            }\n        ]\n    }\n"
  },
  {
    "path": "tests/unit/command/test_queue.py",
    "content": "import pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.queue.kill import CmdQueueKill\nfrom dvc.commands.queue.logs import CmdQueueLogs\nfrom dvc.commands.queue.remove import CmdQueueRemove\nfrom dvc.commands.queue.start import CmdQueueStart\nfrom dvc.commands.queue.status import CmdQueueStatus\nfrom dvc.commands.queue.stop import CmdQueueStop\nfrom dvc.exceptions import InvalidArgumentError\n\n\ndef test_experiments_remove_flags(dvc, scm, mocker):\n    cli_args = parse_args([\"queue\", \"remove\", \"--queued\", \"--success\", \"--failed\"])\n    assert cli_args.func == CmdQueueRemove\n    cmd = cli_args.func(cli_args)\n    remove_mocker = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.clear\", return_value={}\n    )\n    assert cmd.run() == 0\n    remove_mocker.assert_called_once_with(success=True, failed=True, queued=True)\n    cli_args = parse_args([\"queue\", \"remove\", \"--all\"])\n    cmd = cli_args.func(cli_args)\n    remove_mocker.reset_mock()\n    assert cmd.run() == 0\n    remove_mocker.assert_called_once_with(success=True, failed=True, queued=True)\n\n\ndef test_experiments_remove_invalid(dvc, scm, mocker):\n    cli_args = parse_args([\"queue\", \"remove\", \"--queued\", [\"exp1\", \"exp2\"]])\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        assert cmd.run() == 0\n\n    cli_args = parse_args(\n        [\n            \"queue\",\n            \"remove\",\n        ]\n    )\n    cmd = cli_args.func(cli_args)\n    with pytest.raises(InvalidArgumentError):\n        assert cmd.run() == 0\n\n\ndef test_experiments_remove_name(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"queue\",\n            \"remove\",\n            \"exp1\",\n            \"exp2\",\n        ]\n    )\n    assert cli_args.func == CmdQueueRemove\n    cmd = cli_args.func(cli_args)\n    remove_mocker = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.remove\",\n        return_value={},\n    )\n    assert cmd.run() == 0\n    remove_mocker.assert_called_once_with(\n        revs=[\"exp1\", \"exp2\"],\n    )\n\n\ndef test_experiments_kill(dvc, scm, mocker):\n    cli_args = parse_args(\n        [\n            \"queue\",\n            \"kill\",\n            \"--force\",\n            \"exp1\",\n            \"exp2\",\n        ]\n    )\n    assert cli_args.func == CmdQueueKill\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.kill\",\n        return_value={},\n    )\n\n    assert cmd.run() == 0\n    m.assert_called_once_with(revs=[\"exp1\", \"exp2\"], force=True)\n\n\ndef test_experiments_start(dvc, scm, mocker):\n    cli_args = parse_args([\"queue\", \"start\", \"-j\", \"3\"])\n    assert cli_args.func == CmdQueueStart\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.queue.celery.LocalCeleryQueue._spawn_worker\")\n\n    assert cmd.run() == 0\n    assert m.call_count == 3\n\n\ndef test_experiments_stop(dvc, scm, mocker):\n    cli_args = parse_args([\"queue\", \"stop\", \"--kill\"])\n    assert cli_args.func == CmdQueueStop\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.experiments.queue.celery.LocalCeleryQueue.shutdown\")\n\n    assert cmd.run() == 0\n    m.assert_called_once_with(kill=True)\n\n\n@pytest.mark.parametrize(\n    \"worker_status, output\",\n    [\n        (\n            {\"worker1\": [], \"worker2\": []},\n            \"Worker status: 0 active, 2 idle\",\n        ),\n        (\n            {\n                \"worker1\": [{\"id\": \"1\"}],\n                \"worker2\": [{\"id\": \"2\"}],\n                \"worker3\": [],\n            },\n            \"Worker status: 2 active, 1 idle\",\n        ),\n        (\n            {\"worker1\": [{\"id\": \"1\"}]},\n            \"Worker status: 1 active, 0 idle\",\n        ),\n    ],\n)\ndef test_worker_status(dvc, scm, worker_status, output, mocker, capsys):\n    cli_args = parse_args(\n        [\n            \"queue\",\n            \"status\",\n        ]\n    )\n    assert cli_args.func == CmdQueueStatus\n\n    cmd = cli_args.func(cli_args)\n    mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.status\",\n        return_value=[],\n    )\n    m = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.worker_status\",\n        return_value=worker_status,\n    )\n\n    assert cmd.run() == 0\n    m.assert_called_once_with()\n    log, _ = capsys.readouterr()\n    assert \"No experiment tasks in the queue.\" in log\n    assert output in log\n\n\ndef test_experiments_status(dvc, scm, mocker, capsys):\n    from datetime import datetime\n\n    cli_args = parse_args([\"queue\", \"status\"])\n    assert cli_args.func == CmdQueueStatus\n\n    cmd = cli_args.func(cli_args)\n    status_result = [\n        {\n            \"rev\": \"c61a525a4ff39007301b4516fb6e54b323a0587b\",\n            \"name\": \"I40\",\n            \"timestamp\": datetime(2022, 6, 9, 20, 49, 48),  # noqa: DTZ001\n            \"status\": \"Queued\",\n        },\n        {\n            \"rev\": \"8da9c339da30636261a3491a90aafdb760a4168f\",\n            \"name\": \"I60\",\n            \"timestamp\": datetime(2022, 6, 9, 20, 49, 43),  # noqa: DTZ001\n            \"status\": \"Running\",\n        },\n    ]\n    m = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.status\",\n        return_value=status_result,\n    )\n\n    assert cmd.run() == 0\n    m.assert_called_once_with()\n    log, _ = capsys.readouterr()\n    assert \"Task     Name    Created       Status\" in log\n    assert \"c61a525  I40     Jun 09, 2022  Queued\" in log\n    assert \"8da9c33  I60     Jun 09, 2022  Running\" in log\n\n\ndef test_queue_logs(dvc, scm, mocker):\n    cli_args = parse_args([\"queue\", \"logs\", \"exp1\", \"-e\", \"utf8\", \"-f\"])\n    assert cli_args.func == CmdQueueLogs\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\n        \"dvc.repo.experiments.queue.celery.LocalCeleryQueue.logs\",\n        return_value={},\n    )\n\n    assert cmd.run() == 0\n    m.assert_called_once_with(rev=\"exp1\", encoding=\"utf8\", follow=True)\n"
  },
  {
    "path": "tests/unit/command/test_repro.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.repro import CmdRepro\n\ncommon_arguments = {\n    \"all_pipelines\": False,\n    \"downstream\": False,\n    \"dry\": False,\n    \"force\": False,\n    \"interactive\": False,\n    \"pipeline\": False,\n    \"single_item\": False,\n    \"recursive\": False,\n    \"force_downstream\": False,\n    \"pull\": False,\n    \"allow_missing\": False,\n    \"targets\": [],\n    \"on_error\": \"fail\",\n}\nrepro_arguments = {\n    \"run_cache\": True,\n    \"no_commit\": False,\n    \"glob\": False,\n}\n\n\ndef test_default_arguments(dvc, mocker):\n    cmd = CmdRepro(parse_args([\"repro\"]))\n    mocker.patch.object(cmd.repo, \"reproduce\")\n    cmd.run()\n    cmd.repo.reproduce.assert_called_with(**common_arguments, **repro_arguments)\n\n\ndef test_downstream(dvc, mocker):\n    cmd = CmdRepro(parse_args([\"repro\", \"--downstream\"]))\n    mocker.patch.object(cmd.repo, \"reproduce\")\n    cmd.run()\n    arguments = common_arguments.copy()\n    arguments.update(repro_arguments)\n    arguments.update({\"downstream\": True})\n    cmd.repo.reproduce.assert_called_with(**arguments)\n"
  },
  {
    "path": "tests/unit/command/test_stage.py",
    "content": "import pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.stage import CmdStageAdd\nfrom tests.utils.asserts import called_once_with_subset\n\n\n@pytest.mark.parametrize(\n    \"command, parsed_command\",\n    [\n        ([\"echo\", \"foo\", \"bar\"], \"echo foo bar\"),\n        ([\"echo\", '\"foo bar\"'], 'echo \"foo bar\"'),\n        ([\"echo\", \"foo bar\"], 'echo \"foo bar\"'),\n        ([\"cmd\", \"--flag\", \"\"], 'cmd --flag \"\"'),\n    ],\n)\ndef test_stage_add(mocker, dvc, command, parsed_command):\n    cli_args = parse_args(\n        [\n            \"stage\",\n            \"add\",\n            \"--name\",\n            \"name\",\n            \"--deps\",\n            \"deps\",\n            \"--outs\",\n            \"outs\",\n            \"--outs-no-cache\",\n            \"outs-no-cache\",\n            \"--metrics\",\n            \"metrics\",\n            \"--metrics-no-cache\",\n            \"metrics-no-cache\",\n            \"--plots\",\n            \"plots\",\n            \"--plots-no-cache\",\n            \"plots-no-cache\",\n            \"--wdir\",\n            \"wdir\",\n            \"--force\",\n            \"--outs-persist\",\n            \"outs-persist\",\n            \"--outs-persist-no-cache\",\n            \"outs-persist-no-cache\",\n            \"--always-changed\",\n            \"--params\",\n            \"file:param1,param2\",\n            \"--params\",\n            \"param3\",\n            \"--desc\",\n            \"description\",\n            \"--force\",\n            *command,\n        ]\n    )\n    assert cli_args.func == CmdStageAdd\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo.stage, \"add\")\n\n    assert cmd.run() == 0\n    assert called_once_with_subset(\n        m,\n        name=\"name\",\n        deps=[\"deps\"],\n        outs=[\"outs\"],\n        outs_no_cache=[\"outs-no-cache\"],\n        params=[\n            {\"file\": [\"param1\", \"param2\"]},\n            {\"params.yaml\": [\"param3\"]},\n        ],\n        metrics=[\"metrics\"],\n        metrics_no_cache=[\"metrics-no-cache\"],\n        plots=[\"plots\"],\n        plots_no_cache=[\"plots-no-cache\"],\n        wdir=\"wdir\",\n        outs_persist=[\"outs-persist\"],\n        outs_persist_no_cache=[\"outs-persist-no-cache\"],\n        always_changed=True,\n        external=True,\n        desc=\"description\",\n        cmd=parsed_command,\n        force=True,\n    )\n\n\ndef test_stage_add_and_run(mocker, dvc):\n    cli_args = parse_args([\"stage\", \"add\", \"--run\", \"-n\", \"foo\", \"-o\", \"foo\", \"cmd\"])\n    cmd = cli_args.func(cli_args)\n    add_mock = mocker.patch.object(cmd.repo.stage, \"add\")\n\n    assert cmd.run() == 0\n\n    assert called_once_with_subset(add_mock, name=\"foo\", outs=[\"foo\"], cmd=\"cmd\")\n    add_mock.return_value.run.assert_called_once()\n    add_mock.return_value.dump.assert_called_once_with(update_pipeline=False)\n"
  },
  {
    "path": "tests/unit/command/test_status.py",
    "content": "import json\n\nimport pytest\n\nfrom dvc.cli import parse_args\nfrom dvc.commands.status import CmdDataStatus\n\n\ndef test_cloud_status(tmp_dir, dvc, mocker):\n    cli_args = parse_args(\n        [\n            \"status\",\n            \"--cloud\",\n            \"target1\",\n            \"target2\",\n            \"--jobs\",\n            \"2\",\n            \"--remote\",\n            \"remote\",\n            \"--all-branches\",\n            \"--all-tags\",\n            \"--all-commits\",\n            \"--with-deps\",\n            \"--recursive\",\n        ]\n    )\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"status\", autospec=True, return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cloud=True,\n        targets=[\"target1\", \"target2\"],\n        jobs=2,\n        remote=\"remote\",\n        all_branches=True,\n        all_tags=True,\n        all_commits=True,\n        with_deps=True,\n        recursive=True,\n        check_updates=True,\n    )\n\n\n@pytest.mark.parametrize(\"status\", [{}, {\"a\": \"b\", \"c\": [1, 2, 3]}, [1, 2, 3]])\ndef test_status_show_json(dvc, mocker, capsys, status):\n    cli_args = parse_args([\"status\", \"--json\"])\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n\n    mocker.patch.object(cmd.repo, \"status\", autospec=True, return_value=status)\n\n    assert cmd.run() == 0\n    out, _ = capsys.readouterr()\n    assert json.dumps(status) in out\n\n\n@pytest.mark.parametrize(\n    \"status, ret\", [({}, 0), ({\"a\": \"b\", \"c\": [1, 2, 3]}, 1), ([1, 2, 3], 1)]\n)\ndef test_status_quiet(dvc, mocker, caplog, capsys, status, ret):\n    cli_args = parse_args([\"status\", \"-q\"])\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n\n    mocker.patch.object(cmd.repo, \"status\", autospec=True, return_value=status)\n    caplog.clear()\n    assert cmd.run() == ret\n    assert not caplog.messages\n    captured = capsys.readouterr()\n    assert not captured.out\n\n\ndef test_status_empty(dvc, mocker, capsys):\n    from dvc.repo.index import Index\n\n    cli_args = parse_args([\"status\"])\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n\n    spy = mocker.spy(Index, \"from_repo\")\n\n    assert cmd.run() == 0\n\n    captured = capsys.readouterr()\n    assert \"no data or pipelines tracked\" in captured.out\n    # stages should only be collected once\n    assert spy.call_count == 1\n\n\n@pytest.mark.parametrize(\n    \"cloud_opts, expected_message\",\n    [\n        ([\"--cloud\"], \"Cache and remote 'default' are in sync\"),\n        ([\"--remote\", \"remote1\"], \"Cache and remote 'remote1' are in sync\"),\n        ([], \"Data and pipelines are up to date\"),\n    ],\n)\ndef test_status_up_to_date(dvc, mocker, capsys, cloud_opts, expected_message):\n    from dvc.repo.index import Index\n\n    cli_args = parse_args([\"status\", *cloud_opts])\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n\n    mocker.patch.dict(cmd.repo.config, {\"core\": {\"remote\": \"default\"}})\n    mocker.patch.object(cmd.repo, \"status\", autospec=True, return_value={})\n    mocker.patch(\"dvc.repo.Repo.index\", return_value=Index(dvc, [object()]))\n    cmd.repo._reset = mocker.Mock()\n\n    assert cmd.run() == 0\n    captured = capsys.readouterr()\n    assert expected_message in captured.out\n\n\ndef test_status_check_updates(dvc, mocker, capsys):\n    cli_args = parse_args([\"status\", \"--no-updates\"])\n    assert cli_args.func == CmdDataStatus\n\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch.object(cmd.repo, \"status\", autospec=True, return_value={})\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        cloud=False,\n        targets=[],\n        jobs=None,\n        remote=None,\n        all_branches=False,\n        all_tags=False,\n        all_commits=False,\n        with_deps=False,\n        recursive=False,\n        check_updates=False,\n    )\n"
  },
  {
    "path": "tests/unit/command/test_studio.py",
    "content": "import pytest\n\nfrom dvc import env\nfrom dvc.cli import main\nfrom dvc.utils.studio import STUDIO_URL\nfrom dvc_studio_client.auth import AuthorizationExpiredError\n\n\n@pytest.fixture(autouse=True)\ndef global_config_dir(monkeypatch, tmp_path_factory):\n    monkeypatch.setenv(\n        env.DVC_GLOBAL_CONFIG_DIR, str(tmp_path_factory.mktemp(\"studio-login\"))\n    )\n\n\ndef test_studio_login_token_check_failed(mocker):\n    mocker.patch(\n        \"dvc_studio_client.auth.get_access_token\",\n        side_effect=AuthorizationExpiredError,\n    )\n    assert main([\"studio\", \"login\"]) == 1\n\n\ndef test_studio_login_success(mocker, dvc):\n    mocker.patch(\n        \"dvc_studio_client.auth.get_access_token\",\n        return_value=(\"token_name\", \"isat_access_token\"),\n    )\n\n    assert main([\"studio\", \"login\"]) == 0\n\n    config = dvc.config.load_one(\"global\")\n    assert config[\"studio\"][\"token\"] == \"isat_access_token\"\n    assert config[\"studio\"][\"url\"] == STUDIO_URL\n\n\ndef test_studio_login_arguments(mocker):\n    mock = mocker.patch(\n        \"dvc_studio_client.auth.get_access_token\",\n        return_value=(\"token_name\", \"isat_access_token\"),\n    )\n\n    assert (\n        main(\n            [\n                \"studio\",\n                \"login\",\n                \"--name\",\n                \"token_name\",\n                \"--hostname\",\n                \"https://example.com\",\n                \"--scopes\",\n                \"experiments\",\n                \"--no-open\",\n            ]\n        )\n        == 0\n    )\n\n    mock.assert_called_with(\n        token_name=\"token_name\",\n        hostname=\"https://example.com\",\n        scopes=\"experiments\",\n        client_name=\"DVC\",\n        open_browser=False,\n    )\n\n\ndef test_studio_logout(dvc):\n    with dvc.config.edit(\"global\") as conf:\n        conf[\"studio\"][\"token\"] = \"isat_access_token\"\n\n    assert main([\"studio\", \"logout\"]) == 0\n    config = dvc.config.load_one(\"global\")\n    assert \"token\" not in config[\"studio\"]\n\n    assert main([\"studio\", \"logout\"]) == 1\n\n\ndef test_studio_token(dvc, capsys):\n    with dvc.config.edit(\"global\") as conf:\n        conf[\"studio\"][\"token\"] = \"isat_access_token\"\n\n    assert main([\"studio\", \"token\"]) == 0\n    assert capsys.readouterr().out == \"isat_access_token\\n\"\n\n    with dvc.config.edit(\"global\") as conf:\n        del conf[\"studio\"][\"token\"]\n\n    assert main([\"studio\", \"token\"]) == 1\n"
  },
  {
    "path": "tests/unit/command/test_update.py",
    "content": "from dvc.cli import parse_args\nfrom dvc.commands.update import CmdUpdate\n\n\ndef test_update(dvc, mocker):\n    cli_args = parse_args(\n        [\n            \"update\",\n            \"target1\",\n            \"target2\",\n            \"--rev\",\n            \"REV\",\n            \"--recursive\",\n            \"-j\",\n            \"8\",\n        ]\n    )\n    assert cli_args.func == CmdUpdate\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.update\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"target1\", \"target2\"],\n        rev=\"REV\",\n        recursive=True,\n        to_remote=False,\n        no_download=False,\n        remote=None,\n        jobs=8,\n    )\n\n\ndef test_update_to_remote(dvc, mocker):\n    cli_args = parse_args(\n        [\n            \"update\",\n            \"target1\",\n            \"target2\",\n            \"--to-remote\",\n            \"-j\",\n            \"5\",\n            \"-r\",\n            \"remote\",\n            \"--recursive\",\n        ]\n    )\n    assert cli_args.func == CmdUpdate\n    cmd = cli_args.func(cli_args)\n    m = mocker.patch(\"dvc.repo.Repo.update\")\n\n    assert cmd.run() == 0\n\n    m.assert_called_once_with(\n        targets=[\"target1\", \"target2\"],\n        rev=None,\n        recursive=True,\n        to_remote=True,\n        no_download=False,\n        remote=\"remote\",\n        jobs=5,\n    )\n"
  },
  {
    "path": "tests/unit/data/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/data/db/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/data/db/test_local.py",
    "content": "import errno\nimport os\n\nimport pytest\n\nfrom dvc.fs import LocalFileSystem\nfrom dvc_data.hashfile.db.local import LocalHashFileDB\nfrom dvc_data.hashfile.hash_info import HashInfo\n\n\ndef test_status_download_optimization(mocker, dvc):\n    \"\"\"When comparing the status to pull a remote cache,\n    And the desired files to fetch are already on the local cache,\n    Don't check the existence of the desired files on the remote cache\n    \"\"\"\n    from dvc_data.hashfile.status import compare_status\n\n    odb = LocalHashFileDB(LocalFileSystem(), os.getcwd())\n    obj_ids = {\n        HashInfo(\"md5\", \"acbd18db4cc2f85cedef654fccc4a4d8\"),\n        HashInfo(\"md5\", \"37b51d194a7513e45b56f6524f2d51f2\"),\n    }\n\n    local_exists = [hash_info.value for hash_info in obj_ids]\n    mocker.patch.object(odb, \"oids_exist\", return_value=local_exists)\n\n    src_odb = mocker.Mock()\n\n    compare_status(src_odb, odb, obj_ids, check_deleted=False)\n    assert src_odb.oids_exist.call_count == 0\n\n\n@pytest.mark.parametrize(\"link_name\", [\"hardlink\", \"symlink\"])\ndef test_is_protected(tmp_dir, dvc, link_name):\n    odb = dvc.cache.local\n    fs = odb.fs\n    link_method = getattr(fs, link_name)\n\n    (tmp_dir / \"foo\").write_text(\"foo\")\n\n    foo = tmp_dir / \"foo\"\n    link = tmp_dir / \"link\"\n\n    link_method(foo, link)\n\n    assert not odb.is_protected(foo)\n    assert not odb.is_protected(link)\n\n    odb.protect(foo)\n\n    assert odb.is_protected(foo)\n    assert odb.is_protected(link)\n\n    odb.unprotect(link)\n\n    assert not odb.is_protected(link)\n    if os.name == \"nt\" and link_name == \"hardlink\":\n        # NOTE: NTFS doesn't allow deleting read-only files, which forces us to\n        # set write perms on the link, which propagates to the source.\n        assert not odb.is_protected(foo)\n    else:\n        assert odb.is_protected(foo)\n\n\n@pytest.mark.parametrize(\"err\", [errno.EPERM, errno.EACCES, errno.EROFS])\ndef test_protect_ignore_errors(tmp_dir, dvc, mocker, err):\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    mock_chmod = mocker.patch(\"os.chmod\", side_effect=OSError(err, \"something\"))\n    dvc.cache.local.protect(\"foo\")\n    assert mock_chmod.called\n\n\n@pytest.mark.parametrize(\"err\", [errno.EPERM, errno.EACCES, errno.EROFS])\ndef test_set_exec_ignore_errors(tmp_dir, dvc, mocker, err):\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    mock_chmod = mocker.patch(\"os.chmod\", side_effect=OSError(err, \"something\"))\n    dvc.cache.local.set_exec(\"foo\")\n    assert mock_chmod.called\n\n\ndef test_staging_file(tmp_dir, dvc):\n    from dvc_data.hashfile import check\n    from dvc_data.hashfile.build import build\n    from dvc_data.hashfile.transfer import transfer\n\n    tmp_dir.gen(\"foo\", \"foo\")\n    fs = LocalFileSystem()\n\n    local_odb = dvc.cache.local\n    staging_odb, _, obj = build(local_odb, (tmp_dir / \"foo\").fs_path, fs, \"md5\")\n\n    assert not local_odb.exists(obj.hash_info.value)\n    assert staging_odb.exists(obj.hash_info.value)\n\n    with pytest.raises(FileNotFoundError):\n        check(local_odb, obj)\n    check(staging_odb, obj)\n\n    transfer(staging_odb, local_odb, {obj.hash_info}, hardlink=True)\n    check(local_odb, obj)\n    check(staging_odb, obj)\n\n    path = local_odb.oid_to_path(obj.hash_info.value)\n    assert fs.exists(path)\n\n\ndef test_staging_dir(tmp_dir, dvc):\n    from dvc_data.hashfile import check\n    from dvc_data.hashfile.build import build\n    from dvc_data.hashfile.transfer import transfer\n\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    fs = LocalFileSystem()\n    local_odb = dvc.cache.local\n\n    staging_odb, _, obj = build(local_odb, (tmp_dir / \"dir\").fs_path, fs, \"md5\")\n\n    assert not local_odb.exists(obj.hash_info.value)\n    assert staging_odb.exists(obj.hash_info.value)\n\n    with pytest.raises(FileNotFoundError):\n        check(local_odb, obj)\n    check(staging_odb, obj)\n\n    transfer(staging_odb, local_odb, {obj.hash_info}, shallow=False, hardlink=True)\n    check(local_odb, obj)\n    check(staging_odb, obj)\n\n    path = local_odb.oid_to_path(obj.hash_info.value)\n    assert fs.exists(path)\n"
  },
  {
    "path": "tests/unit/dependency/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/dependency/test_dependency.py",
    "content": "import pytest\n\nfrom dvc.dependency import Dependency\nfrom dvc.stage import Stage\n\n\ndef test_save_missing(dvc, mocker):\n    stage = Stage(dvc)\n    dep = Dependency(stage, \"path\")\n    mocker.patch.object(dep.fs, \"exists\", return_value=False)\n    with pytest.raises(dep.DoesNotExistError):\n        dep.save()\n"
  },
  {
    "path": "tests/unit/dependency/test_params.py",
    "content": "import pytest\n\nfrom dvc.dependency import ParamsDependency, loadd_from, loads_params\nfrom dvc.dependency.param import BadParamFileError, MissingParamsError\nfrom dvc.stage import Stage\nfrom dvc.utils.serialize import dump_toml, dump_yaml, load_yaml\n\nPARAMS = {\"foo\": 1, \"bar\": 53.135, \"baz\": \"str\", \"qux\": None}\nDEFAULT_PARAMS_FILE = ParamsDependency.DEFAULT_PARAMS_FILE\n\n\ndef test_loads_params(dvc):\n    stage = Stage(dvc)\n    deps = loads_params(\n        stage,\n        [\n            \"foo\",\n            \"bar\",\n            {\"a_file\": [\"baz\", \"bat\"]},\n            {\"b_file\": [\"cat\"]},\n            {},\n            {\"a_file\": [\"foobar\"]},\n        ],\n    )\n    assert len(deps) == 3\n\n    assert isinstance(deps[0], ParamsDependency)\n    assert deps[0].def_path == ParamsDependency.DEFAULT_PARAMS_FILE\n    assert deps[0].params == [\"foo\", \"bar\"]\n    assert not deps[0].hash_info\n\n    assert isinstance(deps[1], ParamsDependency)\n    assert deps[1].def_path == \"a_file\"\n    assert deps[1].params == [\"baz\", \"bat\", \"foobar\"]\n    assert not deps[1].hash_info\n\n    assert isinstance(deps[2], ParamsDependency)\n    assert deps[2].def_path == \"b_file\"\n    assert deps[2].params == [\"cat\"]\n    assert not deps[2].hash_info\n\n\ndef test_loads_params_without_any_specific_targets(dvc):\n    stage = Stage(dvc)\n    deps = loads_params(\n        stage,\n        [\n            \"foo\",\n            {\"params.yaml\": None},\n            {\"a_file\": []},\n            {\"b_file\": [\"baz\"]},\n            {\"b_file\": [\"bat\"]},\n            {\"a_file\": [\"foobar\"]},\n        ],\n    )\n    assert len(deps) == 3\n\n    assert isinstance(deps[0], ParamsDependency)\n    assert deps[0].def_path == ParamsDependency.DEFAULT_PARAMS_FILE\n    assert deps[0].params == []\n    assert not deps[0].hash_info\n\n    assert isinstance(deps[1], ParamsDependency)\n    assert deps[1].def_path == \"a_file\"\n    assert deps[1].params == []\n    assert not deps[1].hash_info\n\n    assert isinstance(deps[2], ParamsDependency)\n    assert deps[2].def_path == \"b_file\"\n    assert deps[2].params == [\"baz\", \"bat\"]\n    assert not deps[2].hash_info\n\n\n@pytest.mark.parametrize(\n    \"params, errmsg\",\n    [\n        ([3], \"Only list of str/dict is supported. Got: 'int'\"),\n        (\n            [{\"b_file\": \"cat\"}],\n            \"Expected list of params for custom params file 'b_file', got 'str'.\",\n        ),\n    ],\n)\ndef test_params_error(dvc, params, errmsg):\n    with pytest.raises(ValueError, match=errmsg):\n        loads_params(Stage(dvc), params)\n\n\ndef test_loadd_from(dvc):\n    stage = Stage(dvc)\n    deps = loadd_from(stage, [{\"params\": PARAMS}])\n    assert len(deps) == 1\n    assert isinstance(deps[0], ParamsDependency)\n    assert deps[0].def_path == ParamsDependency.DEFAULT_PARAMS_FILE\n    assert deps[0].params == list(PARAMS.keys())\n    assert deps[0].hash_info.value == PARAMS\n\n\ndef test_dumpd_with_info(dvc):\n    dep = ParamsDependency(Stage(dvc), None, PARAMS)\n    assert dep.dumpd() == {\"path\": DEFAULT_PARAMS_FILE, \"params\": PARAMS}\n\n\ndef test_dumpd_without_info(dvc):\n    dep = ParamsDependency(Stage(dvc), None, list(PARAMS.keys()))\n    assert dep.dumpd() == {\"path\": DEFAULT_PARAMS_FILE, \"params\": list(PARAMS.keys())}\n\n\ndef test_read_params_nonexistent_file(dvc):\n    dep = ParamsDependency(Stage(dvc), None, [\"foo\"])\n    assert dep.read_params() == {}\n\n\ndef test_read_params_unsupported_format(tmp_dir, dvc):\n    tmp_dir.gen(DEFAULT_PARAMS_FILE, b\"\\0\\1\\2\\3\\4\\5\\6\\7\")\n    dep = ParamsDependency(Stage(dvc), None, [\"foo\"])\n    with pytest.raises(BadParamFileError):\n        dep.read_params()\n\n\ndef test_read_params_nested(tmp_dir, dvc):\n    dump_yaml(DEFAULT_PARAMS_FILE, {\"some\": {\"path\": {\"foo\": [\"val1\", \"val2\"]}}})\n    dep = ParamsDependency(Stage(dvc), None, [\"some.path.foo\"])\n    assert dep.read_params() == {\"some.path.foo\": [\"val1\", \"val2\"]}\n\n\ndef test_read_params_default_loader(tmp_dir, dvc):\n    parameters_file = \"parameters.foo\"\n    dump_yaml(parameters_file, {\"some\": {\"path\": {\"foo\": [\"val1\", \"val2\"]}}})\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"some.path.foo\"])\n    assert dep.read_params() == {\"some.path.foo\": [\"val1\", \"val2\"]}\n\n\ndef test_read_params_wrong_suffix(tmp_dir, dvc):\n    parameters_file = \"parameters.toml\"\n    dump_yaml(parameters_file, {\"some\": {\"path\": {\"foo\": [\"val1\", \"val2\"]}}})\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"some.path.foo\"])\n    with pytest.raises(BadParamFileError):\n        dep.read_params()\n\n\ndef test_read_params_toml(tmp_dir, dvc):\n    parameters_file = \"parameters.toml\"\n    dump_toml(parameters_file, {\"some\": {\"path\": {\"foo\": [\"val1\", \"val2\"]}}})\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"some.path.foo\"])\n    assert dep.read_params() == {\"some.path.foo\": [\"val1\", \"val2\"]}\n\n\ndef test_read_params_py(tmp_dir, dvc):\n    parameters_file = \"parameters.py\"\n    tmp_dir.gen(\n        parameters_file,\n        (\n            \"INT: int = 5\\n\"\n            \"FLOAT = 0.001\\n\"\n            \"STR = 'abc'\\n\"\n            \"BOOL: bool = True\\n\"\n            \"DICT = {'a': 1}\\n\"\n            \"LIST = [1, 2, 3]\\n\"\n            \"SET = {4, 5, 6}\\n\"\n            \"TUPLE = (10, 100)\\n\"\n            \"NONE = None\\n\"\n        ),\n    )\n    dep = ParamsDependency(\n        Stage(dvc),\n        parameters_file,\n        [\n            \"INT\",\n            \"FLOAT\",\n            \"STR\",\n            \"BOOL\",\n            \"DICT\",\n            \"LIST\",\n            \"SET\",\n            \"TUPLE\",\n            \"NONE\",\n        ],\n    )\n    assert dep.read_params() == {\n        \"INT\": 5,\n        \"FLOAT\": 0.001,\n        \"STR\": \"abc\",\n        \"BOOL\": True,\n        \"DICT\": {\"a\": 1},\n        \"LIST\": [1, 2, 3],\n        \"SET\": {4, 5, 6},\n        \"TUPLE\": (10, 100),\n        \"NONE\": None,\n    }\n\n    tmp_dir.gen(parameters_file, \"class Train:\\n    foo = 'val1'\\n    bar = 'val2'\\n\")\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"Train.foo\"])\n    assert dep.read_params() == {\"Train.foo\": \"val1\"}\n\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"Train\"])\n    assert dep.read_params() == {\"Train\": {\"foo\": \"val1\", \"bar\": \"val2\"}}\n\n    tmp_dir.gen(\n        parameters_file,\n        (\n            \"x = 4\\n\"\n            \"config.x = 3\\n\"\n            \"class Klass:\\n\"\n            \"    def __init__(self):\\n\"\n            \"        self.a = 'val1'\\n\"\n            \"        container.a = 2\\n\"\n            \"        self.container.a = 1\\n\"\n            \"        a = 'val2'\\n\"\n        ),\n    )\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"x\", \"Klass.a\"])\n    assert dep.read_params() == {\"x\": 4, \"Klass.a\": \"val1\"}\n\n\ndef test_params_py_tuple_status(tmp_dir, dvc):\n    \"\"\"https://github.com/treeverse/dvc/issues/8803\"\"\"\n    parameters_file = \"parameters.py\"\n    tmp_dir.gen(parameters_file, \"TUPLE = (10, 100)\\n\")\n    dep = ParamsDependency(Stage(dvc), parameters_file, [\"TUPLE\"])\n    # lock file uses YAML so the tuple will be loaded as a list\n    dep.fill_values({\"TUPLE\": [10, 100]})\n    assert dep.status() == {}\n    dep.fill_values({\"TUPLE\": [11, 100]})\n    assert dep.status() == {\"parameters.py\": {\"TUPLE\": \"modified\"}}\n    dep.fill_values({\"TUPLE\": [10]})\n    assert dep.status() == {\"parameters.py\": {\"TUPLE\": \"modified\"}}\n    dep.fill_values({\"TUPLE\": {10: \"foo\", 100: \"bar\"}})\n    assert dep.status() == {\"parameters.py\": {\"TUPLE\": \"modified\"}}\n\n\ndef test_get_hash_missing_config(dvc):\n    dep = ParamsDependency(Stage(dvc), None, [\"foo\"])\n    with pytest.raises(MissingParamsError):\n        dep.get_hash()\n\n\ndef test_get_hash_missing_param(tmp_dir, dvc):\n    tmp_dir.gen(DEFAULT_PARAMS_FILE, \"bar: baz\")\n    dep = ParamsDependency(Stage(dvc), None, [\"foo\"])\n    with pytest.raises(MissingParamsError):\n        dep.get_hash()\n\n\n@pytest.mark.parametrize(\"param_value\", [\"\", \"false\", \"[]\", \"{}\", \"null\"])\ndef test_params_with_false_values(tmp_dir, dvc, param_value):\n    \"\"\"These falsy params values should not ignored by `status` on loading.\"\"\"\n    key = \"param\"\n    dep = ParamsDependency(Stage(dvc), DEFAULT_PARAMS_FILE, [key])\n    (tmp_dir / DEFAULT_PARAMS_FILE).write_text(f\"{key}: {param_value}\")\n\n    dep.fill_values(load_yaml(DEFAULT_PARAMS_FILE))\n\n    assert dep.status() == {}\n\n\ndef test_params_status_without_targets(tmp_dir, dvc):\n    params_file = tmp_dir / \"params.yaml\"\n    dep = ParamsDependency(Stage(dvc), str(params_file), [])\n\n    assert dep.hash_info.value is None\n    assert dep.status() == {\"params.yaml\": \"deleted\"}\n\n    params_file.dump({\"foo\": \"foo\", \"bar\": \"bar\"})\n\n    assert dep.status() == {\"params.yaml\": \"new\"}\n\n    dep.fill_values({})\n    assert dep.hash_info.value == {}\n    assert dep.status() == {\"params.yaml\": {\"bar\": \"new\", \"foo\": \"new\"}}\n\n    dep.fill_values({\"foo\": \"foobar\", \"lorem\": \"ipsum\"})\n    assert dep.hash_info.value == {\"foo\": \"foobar\", \"lorem\": \"ipsum\"}\n    assert dep.status() == {\n        \"params.yaml\": {\"bar\": \"new\", \"foo\": \"modified\", \"lorem\": \"deleted\"}\n    }\n"
  },
  {
    "path": "tests/unit/fs/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/fs/test_base.py",
    "content": "import pytest\n\nfrom dvc.fs import FileSystem, RemoteMissingDepsError\n\n\ndef test_missing_deps(mocker):\n    requires = {\"missing\": \"missing\"}\n    mocker.patch.object(FileSystem, \"REQUIRES\", requires)\n    with pytest.raises(RemoteMissingDepsError, match=\"missing dependencies\"):\n        FileSystem()\n"
  },
  {
    "path": "tests/unit/fs/test_data.py",
    "content": "import posixpath\nimport shutil\n\nimport pytest\n\nimport dvc_data\nfrom dvc.fs import localfs\nfrom dvc.fs.data import DataFileSystem\nfrom dvc.utils.fs import remove\nfrom dvc_data.hashfile.build import build\nfrom dvc_data.hashfile.hash_info import HashInfo\n\n\n@pytest.mark.parametrize(\n    \"path, key\",\n    [\n        (\"\", ()),\n        (\".\", ()),\n        (\"/\", ()),\n        (\"foo\", (\"foo\",)),\n        (\"dir/foo\", (\"dir\", \"foo\")),\n    ],\n)\ndef test_get_key(tmp_dir, dvc, path, key):\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert fs.fs._get_key(path) == key\n\n\ndef test_exists(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    (tmp_dir / \"foo\").unlink()\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert fs.exists(\"foo\")\n\n\ndef test_open(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    (tmp_dir / \"foo\").unlink()\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    with fs.open(\"foo\", \"r\") as fobj:\n        assert fobj.read() == \"foo\"\n\n\ndef test_open_dirty_hash(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file\", \"file\")\n    (tmp_dir / \"file\").write_text(\"something\")\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    with fs.open(\"file\", \"r\") as fobj:\n        # NOTE: Unlike DVCFileSystem, DataFileSystem should not\n        # be affected by a dirty workspace.\n        assert fobj.read() == \"file\"\n\n\ndef test_open_no_remote(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file\", \"file\")\n    (tmp_dir / \"file\").unlink()\n    remove(dvc.cache.local.path)\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    with pytest.raises(FileNotFoundError):\n        with fs.open(\"file\", \"r\"):\n            pass\n\n\ndef test_open_dirty_no_hash(tmp_dir, dvc):\n    tmp_dir.gen(\"file\", \"file\")\n    (tmp_dir / \"file.dvc\").write_text(\"outs:\\n- path: file\\n\")\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    # NOTE: Unlike DVCFileSystem, DataFileSystem should not\n    # be affected by a dirty workspace.\n    with pytest.raises(FileNotFoundError):\n        with fs.open(\"file\", \"r\"):\n            pass\n\n\ndef test_open_in_history(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    dvc.scm.add([\"foo.dvc\", \".gitignore\"])\n    dvc.scm.commit(\"foo\")\n\n    tmp_dir.gen(\"foo\", \"foofoo\")\n    dvc.add(\"foo\")\n    dvc.scm.add([\"foo.dvc\", \".gitignore\"])\n    dvc.scm.commit(\"foofoo\")\n\n    with dvc.switch(\"HEAD~1\"):\n        fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n        with fs.open(\"foo\", \"r\") as fobj:\n            assert fobj.read() == \"foo\"\n\n\ndef test_isdir_isfile(tmp_dir, dvc):\n    tmp_dir.gen({\"datafile\": \"data\", \"datadir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert not fs.isdir(\"datadir\")\n    assert not fs.isfile(\"datadir\")\n    assert not fs.isdir(\"datafile\")\n    assert not fs.isfile(\"datafile\")\n\n    dvc.add([\"datadir\", \"datafile\"])\n    shutil.rmtree(tmp_dir / \"datadir\")\n    (tmp_dir / \"datafile\").unlink()\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert fs.isdir(\"datadir\")\n    assert not fs.isfile(\"datadir\")\n    assert not fs.isdir(\"datafile\")\n    assert fs.isfile(\"datafile\")\n\n\ndef test_isdir_mixed(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    dvc.add(str(tmp_dir / \"dir\" / \"foo\"))\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert fs.isdir(\"dir\")\n    assert not fs.isfile(\"dir\")\n\n\ndef test_walk(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"subdir1\": {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n                \"subdir2\": {\"foo2\": \"foo2\"},\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n            }\n        }\n    )\n\n    dvc.add(localfs.find(\"dir\"))\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n\n    expected = [\n        \"dir/subdir1\",\n        \"dir/subdir2\",\n        \"dir/subdir1/foo1\",\n        \"dir/subdir1/bar1\",\n        \"dir/subdir2/foo2\",\n        \"dir/foo\",\n        \"dir/bar\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\"):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_dir(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"subdir1\": {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n                \"subdir2\": {\"foo2\": \"foo2\"},\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n            }\n        }\n    )\n\n    dvc.add(\"dir\")\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n\n    expected = [\n        \"dir/subdir1\",\n        \"dir/subdir2\",\n        \"dir/subdir1/foo1\",\n        \"dir/subdir1/bar1\",\n        \"dir/subdir2/foo2\",\n        \"dir/foo\",\n        \"dir/bar\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\"):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_missing(tmp_dir, dvc):\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n\n    for _ in fs.walk(\"dir\"):\n        pass\n\n\ndef test_walk_not_a_dir(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n\n    for _ in fs.walk(\"foo\"):\n        pass\n\n\ndef test_get_hash_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    assert fs.info(\"foo\")[\"md5\"] == \"acbd18db4cc2f85cedef654fccc4a4d8\"\n\n\ndef test_get_hash_dir(tmp_dir, dvc, mocker):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"subdir\": {\"data\": \"data\"}}})\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    hash_file_spy = mocker.spy(dvc_data.hashfile.hash, \"hash_file\")\n    assert fs.info(\"dir\")[\"md5\"] == \"8761c4e9acad696bee718615e23e22db.dir\"\n    assert not hash_file_spy.called\n\n\ndef test_get_hash_granular(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"subdir\": {\"data\": \"data\"}}})\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    subdir = \"dir/subdir\"\n    assert fs.info(subdir).get(\"md5\") is None\n    _, _, obj = build(dvc.cache.local, subdir, fs, \"md5\", dry_run=True)\n    assert obj.hash_info == HashInfo(\"md5\", \"af314506f1622d107e0ed3f14ec1a3b5.dir\")\n    data = posixpath.join(subdir, \"data\")\n    assert fs.info(data)[\"md5\"] == \"8d777f385d3dfec8815d20f7496026dc\"\n    _, _, obj = build(dvc.cache.local, data, fs, \"md5\", dry_run=True)\n    assert obj.hash_info == HashInfo(\"md5\", \"8d777f385d3dfec8815d20f7496026dc\")\n\n\ndef test_get_hash_dirty_file(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file\", \"file\")\n    (tmp_dir / \"file\").write_text(\"something\")\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    expected = \"8c7dd922ad47494fc02c388e12c00eac\"\n    assert fs.info(\"file\").get(\"md5\") == expected\n    _, _, obj = build(dvc.cache.local, \"file\", fs, \"md5\", dry_run=True)\n    assert obj.hash_info == HashInfo(\"md5\", expected)\n\n\ndef test_get_hash_dirty_dir(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    (tmp_dir / \"dir\" / \"baz\").write_text(\"baz\")\n\n    fs = DataFileSystem(index=dvc.index.data[\"repo\"])\n    expected = \"5ea40360f5b4ec688df672a4db9c17d1.dir\"\n    assert fs.info(\"dir\").get(\"md5\") == expected\n    _, _, obj = build(dvc.cache.local, \"dir\", fs, \"md5\", dry_run=True)\n    assert obj.hash_info == HashInfo(\"md5\", expected)\n"
  },
  {
    "path": "tests/unit/fs/test_dvc.py",
    "content": "import os\nimport posixpath\nimport shutil\n\nimport pytest\nfrom fsspec.utils import tokenize\n\nfrom dvc.fs import localfs\nfrom dvc.fs.dvc import DVCFileSystem\nfrom dvc.testing.tmp_dir import make_subrepo\nfrom dvc_data.hashfile.build import build\nfrom dvc_data.hashfile.hash_info import HashInfo\n\n\ndef test_exists(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    (tmp_dir / \"foo\").unlink()\n\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.exists(\"foo\")\n\n\ndef test_open(tmp_dir, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    (tmp_dir / \"foo\").unlink()\n\n    fs = DVCFileSystem(repo=dvc)\n    with fs.open(\"foo\", \"r\") as fobj:\n        assert fobj.read() == \"foo\"\n\n\ndef test_open_dirty_hash(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"file\", \"file\")\n    (tmp_dir / \"file\").write_text(\"something\")\n\n    fs = DVCFileSystem(repo=dvc)\n    with fs.open(\"file\", \"r\") as fobj:\n        assert fobj.read() == \"something\"\n\n\ndef test_open_dirty_no_hash(tmp_dir, dvc):\n    tmp_dir.gen(\"file\", \"file\")\n    (tmp_dir / \"file.dvc\").write_text(\"outs:\\n- path: file\\n\")\n\n    fs = DVCFileSystem(repo=dvc)\n    with fs.open(\"file\", \"r\") as fobj:\n        assert fobj.read() == \"file\"\n\n\ndef test_open_in_history(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"foo\", \"foo\")\n    dvc.add(\"foo\")\n    dvc.scm.add([\"foo.dvc\", \".gitignore\"])\n    dvc.scm.commit(\"foo\")\n\n    tmp_dir.gen(\"foo\", \"foofoo\")\n    dvc.add(\"foo\")\n    dvc.scm.add([\"foo.dvc\", \".gitignore\"])\n    dvc.scm.commit(\"foofoo\")\n\n    with dvc.switch(\"HEAD~1\"):\n        fs = DVCFileSystem(repo=dvc)\n        with fs.open(\"foo\", \"r\") as fobj:\n            assert fobj.read() == \"foo\"\n\n\ndef test_isdir_isfile(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"datafile\": \"data\",\n            \"datadir\": {\n                \"foo\": \"foo\",\n                \"bar\": \"bar\",\n            },\n            \"subdir\": {\n                \"baz\": \"baz\",\n                \"data\": {\n                    \"abc\": \"abc\",\n                    \"xyz\": \"xyz\",\n                },\n            },\n        },\n    )\n\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.isdir(\"datadir\")\n    assert not fs.isfile(\"datadir\")\n    assert not fs.isdvc(\"datadir\")\n    assert not fs.isdir(\"datafile\")\n    assert fs.isfile(\"datafile\")\n    assert not fs.isdvc(\"datafile\")\n\n    dvc.add(\n        [\n            \"datadir\",\n            \"datafile\",\n            os.path.join(\"subdir\", \"baz\"),\n            os.path.join(\"subdir\", \"data\"),\n        ]\n    )\n    shutil.rmtree(tmp_dir / \"datadir\")\n    shutil.rmtree(tmp_dir / \"subdir\" / \"data\")\n    (tmp_dir / \"datafile\").unlink()\n    (tmp_dir / \"subdir\" / \"baz\").unlink()\n\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.isdir(\"datadir\")\n    assert not fs.isfile(\"datadir\")\n    assert fs.isdvc(\"datadir\")\n    assert not fs.isdir(\"datafile\")\n    assert fs.isfile(\"datafile\")\n    assert fs.isdvc(\"datafile\")\n\n    assert fs.isdir(\"subdir\")\n    assert not fs.isfile(\"subdir\")\n    assert not fs.isdvc(\"subdir\")\n    assert fs.isfile(\"subdir/baz\")\n    assert fs.isdir(\"subdir/data\")\n\n\ndef test_exists_isdir_isfile_dirty(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"datafile\": \"data\", \"datadir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    fs = DVCFileSystem(repo=dvc)\n    shutil.rmtree(tmp_dir / \"datadir\")\n    (tmp_dir / \"datafile\").unlink()\n\n    assert fs.exists(\"datafile\")\n    assert fs.exists(\"datadir\")\n    assert fs.exists(\"datadir/foo\")\n    assert fs.isfile(\"datafile\")\n    assert not fs.isfile(\"datadir\")\n    assert fs.isfile(\"datadir/foo\")\n    assert not fs.isdir(\"datafile\")\n    assert fs.isdir(\"datadir\")\n    assert not fs.isdir(\"datadir/foo\")\n\n    # NOTE: creating file instead of dir and dir instead of file\n    tmp_dir.gen({\"datadir\": \"data\", \"datafile\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    assert fs.exists(\"datafile\")\n    assert fs.exists(\"datadir\")\n    assert not fs.exists(\"datadir/foo\")\n    assert fs.exists(\"datafile/foo\")\n    assert not fs.isfile(\"datafile\")\n    assert fs.isfile(\"datadir\")\n    assert not fs.isfile(\"datadir/foo\")\n    assert fs.isfile(\"datafile/foo\")\n    assert fs.isdir(\"datafile\")\n    assert not fs.isdir(\"datadir\")\n    assert not fs.isdir(\"datadir/foo\")\n    assert not fs.isdir(\"datafile/foo\")\n\n\ndef test_isdir_mixed(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    dvc.add(str(tmp_dir / \"dir\" / \"foo\"))\n\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.isdir(\"dir\")\n    assert not fs.isfile(\"dir\")\n\n\ndef test_ls_dirty(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"data\": \"data\"})\n    (tmp_dir / \"data\").unlink()\n\n    tmp_dir.gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n\n    fs = DVCFileSystem(repo=dvc)\n    assert set(fs.ls(\"data\")) == {\"data/foo\", \"data/bar\"}\n\n\ndef test_ls_file_not_found(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"data\": \"data\"})\n\n    fs = DVCFileSystem(repo=dvc)\n    with pytest.raises(FileNotFoundError):\n        fs.ls(\"missing\")\n\n\ndef test_ls_dir_empty(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"data\": \"data\"})\n    empty = tmp_dir / \"empty\"\n    empty.mkdir()\n\n    fs = DVCFileSystem(repo=dvc)\n    assert set(fs.ls(\"empty\")) == set()\n\n\n@pytest.mark.parametrize(\n    \"dvcfiles,extra_expected\",\n    [\n        (False, []),\n        (\n            True,\n            [\n                \"dir/subdir1/foo1.dvc\",\n                \"dir/subdir1/bar1.dvc\",\n                \"dir/subdir2/foo2.dvc\",\n            ],\n        ),\n    ],\n)\ndef test_walk(tmp_dir, dvc, dvcfiles, extra_expected):\n    tmp_dir.gen(\n        {\n            \"dir\": {\n                \"subdir1\": {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n                \"subdir2\": {\"foo2\": \"foo2\"},\n            }\n        }\n    )\n    dvc.add(localfs.find(\"dir\"))\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    fs = DVCFileSystem(repo=dvc)\n\n    expected = [\n        \"dir/subdir1\",\n        \"dir/subdir2\",\n        \"dir/subdir1/foo1\",\n        \"dir/subdir1/bar1\",\n        \"dir/subdir2/foo2\",\n        \"dir/foo\",\n        \"dir/bar\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\", dvcfiles=dvcfiles):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    expected += extra_expected\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_dirty(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\n        {\n            \"dir\": {\n                \"foo\": \"foo\",\n                \"subdir1\": {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n                \"subdir2\": {\"foo2\": \"foo2\"},\n            }\n        }\n    )\n    tmp_dir.gen({\"dir\": {\"bar\": \"bar\", \"subdir3\": {\"foo3\": \"foo3\"}}})\n    (tmp_dir / \"dir\" / \"foo\").unlink()\n\n    fs = DVCFileSystem(repo=dvc)\n    expected = [\n        \"dir/subdir1\",\n        \"dir/subdir2\",\n        \"dir/subdir3\",\n        \"dir/subdir1/foo1\",\n        \"dir/subdir1/bar1\",\n        \"dir/subdir2/foo2\",\n        \"dir/subdir3/foo3\",\n        \"dir/bar\",\n        \"dir/foo\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\"):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_dirty_cached_dir(tmp_dir, scm, dvc):\n    tmp_dir.dvc_gen({\"data\": {\"foo\": \"foo\", \"bar\": \"bar\"}}, commit=\"add data\")\n    (tmp_dir / \"data\" / \"foo\").unlink()\n\n    fs = DVCFileSystem(repo=dvc)\n\n    actual = []\n    for root, dirs, files in fs.walk(\"data\"):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    expected = [\"data/foo\", \"data/bar\"]\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_mixed_dir(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc.add(os.path.join(\"dir\", \"foo\"))\n    tmp_dir.scm.add(\n        [\n            os.path.join(\"dir\", \"bar\"),\n            os.path.join(\"dir\", \".gitignore\"),\n            os.path.join(\"dir\", \"foo.dvc\"),\n        ]\n    )\n    tmp_dir.scm.commit(\"add dir\")\n\n    fs = DVCFileSystem(repo=dvc)\n\n    expected = [\"dir/foo\", \"dir/bar\", \"dir/.gitignore\"]\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\"):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_walk_missing(tmp_dir, dvc):\n    fs = DVCFileSystem(repo=dvc)\n\n    for _ in fs.walk(\"dir\"):\n        pass\n\n\ndef test_walk_not_a_dir(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    fs = DVCFileSystem(repo=dvc)\n\n    for _ in fs.walk(\"foo\"):\n        pass\n\n\ndef test_isdvc(tmp_dir, dvc):\n    tmp_dir.gen({\"foo\": \"foo\", \"bar\": \"bar\", \"dir\": {\"baz\": \"baz\"}})\n    dvc.add(\"foo\")\n    dvc.add(\"dir\")\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.isdvc(\"foo\")\n    assert not fs.isdvc(\"bar\")\n    assert fs.isdvc(\"dir\")\n    assert fs.isdvc(\"dir/baz\")\n    assert fs.isdvc(\"dir/baz\", recursive=True)\n\n\ndef test_subrepos(tmp_dir, scm, dvc, mocker):\n    tmp_dir.scm_gen(\n        {\"dir\": {\"repo.txt\": \"file to confuse DVCFileSystem\"}},\n        commit=\"dir/repo.txt\",\n    )\n\n    subrepo1 = tmp_dir / \"dir\" / \"repo\"\n    subrepo2 = tmp_dir / \"dir\" / \"repo2\"\n\n    for repo in [subrepo1, subrepo2]:\n        make_subrepo(repo, scm)\n\n    with subrepo1.chdir():\n        subrepo1.dvc_gen({\"foo\": \"foo\", \"dir1\": {\"bar\": \"bar\"}}, commit=\"FOO\")\n    with subrepo2.chdir():\n        subrepo2.dvc_gen({\"lorem\": \"lorem\", \"dir2\": {\"ipsum\": \"ipsum\"}}, commit=\"BAR\")\n\n    dvc._reset()\n    fs = DVCFileSystem(repo=dvc, subrepos=True)\n\n    def assert_fs_belongs_to_repo(ret_val):\n        method = fs.fs._get_repo\n\n        def f(*args, **kwargs):\n            r = method(*args, **kwargs)\n            assert r.root_dir == ret_val.root_dir\n            return r\n\n        return f\n\n    mock_subrepo1 = mocker.patch.object(\n        fs.fs, \"_get_repo\", side_effect=assert_fs_belongs_to_repo(subrepo1.dvc)\n    )\n    assert fs.exists(\"dir/repo/foo\") is True\n    assert fs.exists(\"dir/repo/bar\") is False\n\n    assert fs.isfile(\"dir/repo/foo\") is True\n    assert fs.isfile(\"dir/repo/dir1/bar\") is True\n    assert fs.isfile(\"dir/repo/dir1\") is False\n\n    assert fs.isdir(\"dir/repo/dir1\") is True\n    assert fs.isdir(\"dir/repo/dir1/bar\") is False\n    assert fs.isdvc(\"dir/repo/foo\") is True\n    mocker.stop(mock_subrepo1)\n\n    mock_subrepo2 = mocker.patch.object(\n        fs.fs, \"_get_repo\", side_effect=assert_fs_belongs_to_repo(subrepo2.dvc)\n    )\n    assert fs.exists(\"dir/repo2/lorem\") is True\n    assert fs.exists(\"dir/repo2/ipsum\") is False\n\n    assert fs.isfile(\"dir/repo2/lorem\") is True\n    assert fs.isfile(\"dir/repo2/dir2/ipsum\") is True\n    assert fs.isfile(\"dir/repo2/dir2\") is False\n\n    assert fs.isdir(\"dir/repo2/dir2\") is True\n    assert fs.isdir(\"dir/repo2/dir2/ipsum\") is False\n    assert fs.isdvc(\"dir/repo2/lorem\") is True\n    mocker.stop(mock_subrepo2)\n\n\n@pytest.mark.parametrize(\n    \"dvcfiles,extra_expected\",\n    [\n        (False, []),\n        (\n            True,\n            [\n                \"dir/repo/foo.dvc\",\n                \"dir/repo/.dvcignore\",\n                \"dir/repo/dir1.dvc\",\n                \"dir/repo2/.dvcignore\",\n                \"dir/repo2/lorem.dvc\",\n                \"dir/repo2/dir2.dvc\",\n            ],\n        ),\n    ],\n)\ndef test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected):\n    tmp_dir.scm_gen(\n        {\"dir\": {\"repo.txt\": \"file to confuse DVCFileSystem\"}},\n        commit=\"dir/repo.txt\",\n    )\n\n    subrepo1 = tmp_dir / \"dir\" / \"repo\"\n    subrepo2 = tmp_dir / \"dir\" / \"repo2\"\n\n    subdirs = [subrepo1, subrepo2]\n    for dir_ in subdirs:\n        make_subrepo(dir_, scm)\n\n    with subrepo1.chdir():\n        subrepo1.dvc_gen({\"foo\": \"foo\", \"dir1\": {\"bar\": \"bar\"}}, commit=\"FOO\")\n    with subrepo2.chdir():\n        subrepo2.dvc_gen({\"lorem\": \"lorem\", \"dir2\": {\"ipsum\": \"ipsum\"}}, commit=\"BAR\")\n\n    # using fs that does not have dvcignore\n    dvc._reset()\n    fs = DVCFileSystem(repo=dvc)\n    expected = [\n        \"dir/repo\",\n        \"dir/repo.txt\",\n        \"dir/repo2\",\n        \"dir/repo/.gitignore\",\n        \"dir/repo/foo\",\n        \"dir/repo/dir1\",\n        \"dir/repo/dir1/bar\",\n        \"dir/repo2/.gitignore\",\n        \"dir/repo2/lorem\",\n        \"dir/repo2/dir2\",\n        \"dir/repo2/dir2/ipsum\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"dir\", dvcfiles=dvcfiles, ignore_subrepos=False):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    expected += extra_expected\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n\ndef test_dvcfs_no_subrepos(tmp_dir, dvc, scm):\n    tmp_dir.scm_gen(\n        {\"dir\": {\"repo.txt\": \"file to confuse DVCFileSystem\"}},\n        commit=\"dir/repo.txt\",\n    )\n    tmp_dir.dvc_gen({\"lorem\": \"lorem\"}, commit=\"add foo\")\n\n    subrepo = tmp_dir / \"dir\" / \"repo\"\n    make_subrepo(subrepo, scm)\n    with subrepo.chdir():\n        subrepo.dvc_gen({\"foo\": \"foo\", \"dir1\": {\"bar\": \"bar\"}}, commit=\"FOO\")\n        subrepo.scm_gen({\"ipsum\": \"ipsum\"}, commit=\"BAR\")\n\n    # using fs that does not have dvcignore\n    dvc._reset()\n    fs = DVCFileSystem(repo=dvc)\n    expected = [\n        \"/.dvcignore\",\n        \"/.gitignore\",\n        \"/lorem\",\n        \"/lorem.dvc\",\n        \"/dir\",\n        \"/dir/repo.txt\",\n    ]\n\n    actual = []\n    for root, dirs, files in fs.walk(\"/\", dvcfiles=True):\n        for entry in dirs + files:\n            actual.append(posixpath.join(root, entry))\n\n    assert set(actual) == set(expected)\n    assert len(actual) == len(expected)\n\n    assert fs.isfile(\"lorem\") is True\n    assert fs.isfile(\"dir/repo/foo\") is False\n    assert fs.isdir(\"dir/repo\") is False\n    assert fs.isdir(\"dir\") is True\n\n    assert fs.isdvc(\"lorem\") is True\n    assert fs.isdvc(\"dir/repo/dir1\") is False\n\n    assert fs.exists(\"dir/repo.txt\") is True\n    assert fs.exists(\"repo/ipsum\") is False\n\n\ndef test_get_hash_cached_file(tmp_dir, dvc, mocker):\n    tmp_dir.dvc_gen({\"foo\": \"foo\"})\n    fs = DVCFileSystem(repo=dvc)\n    expected = \"acbd18db4cc2f85cedef654fccc4a4d8\"\n    assert fs.info(\"foo\").get(\"md5\") is None\n    _, _, obj = build(dvc.cache.local, \"foo\", fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", expected)\n    (tmp_dir / \"foo\").unlink()\n    assert fs.info(\"foo\")[\"md5\"] == expected\n\n\ndef test_get_hash_cached_dir(tmp_dir, dvc, mocker):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"subdir\": {\"data\": \"data\"}}})\n    fs = DVCFileSystem(repo=dvc)\n    expected = \"8761c4e9acad696bee718615e23e22db.dir\"\n    assert fs.info(\"dir\").get(\"md5\") is None\n    _, _, obj = build(dvc.cache.local, \"dir\", fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", \"8761c4e9acad696bee718615e23e22db.dir\")\n\n    shutil.rmtree(tmp_dir / \"dir\")\n    assert fs.info(\"dir\")[\"md5\"] == expected\n    _, _, obj = build(dvc.cache.local, \"dir\", fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", \"8761c4e9acad696bee718615e23e22db.dir\")\n\n\ndef test_get_hash_cached_granular(tmp_dir, dvc, mocker):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\", \"subdir\": {\"data\": \"data\"}}})\n    fs = DVCFileSystem(repo=dvc)\n    subdir = \"dir/subdir\"\n    assert fs.info(subdir).get(\"md5\") is None\n    _, _, obj = build(dvc.cache.local, subdir, fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", \"af314506f1622d107e0ed3f14ec1a3b5.dir\")\n    assert fs.info(posixpath.join(subdir, \"data\")).get(\"md5\") is None\n    _, _, obj = build(dvc.cache.local, posixpath.join(subdir, \"data\"), fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", \"8d777f385d3dfec8815d20f7496026dc\")\n    (tmp_dir / \"dir\" / \"subdir\" / \"data\").unlink()\n    assert (\n        fs.info(posixpath.join(subdir, \"data\"))[\"md5\"]\n        == \"8d777f385d3dfec8815d20f7496026dc\"\n    )\n\n\ndef test_get_hash_mixed_dir(tmp_dir, scm, dvc):\n    tmp_dir.gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    tmp_dir.dvc.add(os.path.join(\"dir\", \"foo\"))\n    tmp_dir.scm.add(\n        [\n            os.path.join(\"dir\", \"bar\"),\n            os.path.join(\"dir\", \".gitignore\"),\n            os.path.join(\"dir\", \"foo.dvc\"),\n        ]\n    )\n    tmp_dir.scm.commit(\"add dir\")\n\n    fs = DVCFileSystem(repo=dvc)\n    _, _, obj = build(dvc.cache.local, \"dir\", fs, \"md5\")\n    if os.name == \"nt\":\n        expected_hash = \"0d2086760aea091f1504eafc8843bb18.dir\"\n    else:\n        expected_hash = \"e1d9e8eae5374860ae025ec84cfd85c7.dir\"\n    assert obj.hash_info == HashInfo(\"md5\", expected_hash)\n\n\ndef test_get_hash_dirty_file(tmp_dir, dvc):\n    from dvc_data.hashfile import check\n    from dvc_data.hashfile.hash import hash_file\n\n    tmp_dir.dvc_gen(\"file\", \"file\")\n    file_hash_info = HashInfo(\"md5\", \"8c7dd922ad47494fc02c388e12c00eac\")\n\n    (tmp_dir / \"file\").write_text(\"something\")\n    something_hash_info = HashInfo(\"md5\", \"437b930db84b8079c2dd804a71936b5f\")\n\n    # file is modified in workspace\n    # hash_file(file) should return workspace hash, not DVC cached hash\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.info(\"file\").get(\"md5\") is None\n    staging, _, obj = build(dvc.cache.local, \"file\", fs, \"md5\")\n    assert obj.hash_info == something_hash_info\n    check(staging, obj)\n\n    # hash_file(file) should return DVC cached hash\n    (tmp_dir / \"file\").unlink()\n    assert fs.info(\"file\")[\"md5\"] == file_hash_info.value\n    _, hash_info = hash_file(\"file\", fs, \"md5\", state=dvc.state)\n    assert hash_info == file_hash_info\n\n    # tmp_dir/file can be built even though it is missing in workspace since\n    # repofs will use the DVC cached hash (and refer to the local cache object)\n    _, _, obj = build(dvc.cache.local, \"file\", fs, \"md5\")\n    assert obj.hash_info == file_hash_info\n\n\ndef test_get_hash_dirty_dir(tmp_dir, dvc):\n    tmp_dir.dvc_gen({\"dir\": {\"foo\": \"foo\", \"bar\": \"bar\"}})\n    (tmp_dir / \"dir\" / \"baz\").write_text(\"baz\")\n\n    fs = DVCFileSystem(repo=dvc)\n    _, meta, obj = build(dvc.cache.local, \"dir\", fs, \"md5\")\n    assert obj.hash_info == HashInfo(\"md5\", \"ba75a2162ca9c29acecb7957105a0bc2.dir\")\n    assert meta.nfiles == 3\n\n\n@pytest.mark.parametrize(\"traverse_subrepos\", [True, False])\ndef test_walk_nested_subrepos(tmp_dir, dvc, scm, traverse_subrepos):\n    # generate a dvc and fs structure, with suffix based on repo's basename\n    def fs_structure(suffix):\n        return {\n            f\"foo-{suffix}\": f\"foo-{suffix}\",\n            f\"dir-{suffix}\": {f\"bar-{suffix}\": f\"bar-{suffix}\"},\n        }\n\n    def dvc_structure(suffix):\n        return {\n            f\"lorem-{suffix}\": f\"lorem-{suffix}\",\n            f\"dvc-{suffix}\": {f\"ipsum-{suffix}\": f\"ipsum-{suffix}\"},\n        }\n\n    paths = [\"subrepo1\", \"subrepo2\", os.path.join(\"subrepo1\", \"subrepo3\")]\n    subrepos = [tmp_dir / path for path in paths]\n    for repo_dir in subrepos:\n        make_subrepo(repo_dir, scm)\n\n    extras = {\".gitignore\"}  # these files are always there\n    expected = {}\n    for repo_dir in [*subrepos, tmp_dir]:\n        base = os.path.basename(repo_dir)\n        scm_files = fs_structure(base)\n        dvc_files = dvc_structure(base)\n        with repo_dir.chdir():\n            repo_dir.scm_gen(scm_files, commit=f\"git add in {repo_dir}\")\n            repo_dir.dvc_gen(dvc_files, commit=f\"dvc add in {repo_dir}\")\n\n        if traverse_subrepos or repo_dir == tmp_dir:\n            repo_dir_path = (\n                \"/\" + repo_dir.relative_to(tmp_dir).as_posix()\n                if repo_dir != tmp_dir\n                else \"/\"\n            )\n            expected[repo_dir_path] = set(scm_files.keys() | dvc_files.keys() | extras)\n            # files inside a dvc directory\n            expected[posixpath.join(repo_dir_path, f\"dvc-{base}\")] = {f\"ipsum-{base}\"}\n            # files inside a git directory\n            expected[posixpath.join(repo_dir_path, f\"dir-{base}\")] = {f\"bar-{base}\"}\n\n    if traverse_subrepos:\n        # update subrepos\n        expected[\"/\"].update([\"subrepo1\", \"subrepo2\"])\n        expected[\"/subrepo1\"].add(\"subrepo3\")\n\n    actual = {}\n    fs = DVCFileSystem(repo=dvc)\n    for root, dirs, files in fs.walk(\"/\", ignore_subrepos=not traverse_subrepos):\n        actual[root] = set(dirs + files)\n    assert expected == actual\n\n\ndef test_fsid_noscm(tmp_dir, dvc):\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.fsid == \"dvcfs_\" + tokenize(dvc.root_dir, None)\n\n\ndef test_fsid(tmp_dir, dvc, scm):\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.fsid == \"dvcfs_\" + tokenize(dvc.root_dir, scm.get_rev())\n    old_fsid = fs.fsid\n\n    tmp_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"foo\")\n    fs = DVCFileSystem(repo=dvc)\n    assert fs.fsid != old_fsid\n    assert fs.fsid == \"dvcfs_\" + tokenize(dvc.root_dir, scm.get_rev())\n\n\ndef test_fsid_url(erepo_dir):\n    from dvc.repo import Repo\n\n    url = f\"file://{erepo_dir.as_posix()}\"\n    with Repo.open(url) as dvc:\n        fs = DVCFileSystem(repo=dvc)\n        assert fs.fsid == \"dvcfs_\" + tokenize(url, erepo_dir.scm.get_rev())\n        old_fsid = fs.fsid\n\n    with erepo_dir.chdir():\n        erepo_dir.dvc_gen({\"foo\": \"foo\"}, commit=\"foo\")\n\n    with Repo.open(url) as dvc:\n        fs = DVCFileSystem(repo=dvc)\n        assert fs.fsid != old_fsid\n        assert fs.fsid == \"dvcfs_\" + tokenize(url, erepo_dir.scm.get_rev())\n\n\n@pytest.mark.parametrize(\n    \"fs_kwargs\",\n    [\n        lambda tmp_dir, dvc: {},  # noqa: ARG005\n        lambda tmp_dir, dvc: {\"repo\": tmp_dir},  # noqa: ARG005\n        lambda tmp_dir, dvc: {\"repo\": os.fspath(tmp_dir)},  # noqa: ARG005\n        lambda tmp_dir, dvc: {\"url\": tmp_dir},  # noqa: ARG005\n        lambda tmp_dir, dvc: {\"url\": os.fspath(tmp_dir)},  # noqa: ARG005\n        lambda tmp_dir, dvc: {\"repo\": dvc},  # noqa: ARG005\n    ],\n)\ndef test_init_arg(tmp_dir, dvc, fs_kwargs):\n    fs = DVCFileSystem(**fs_kwargs(tmp_dir, dvc))\n\n    assert fs.repo.root_dir == dvc.root_dir\n"
  },
  {
    "path": "tests/unit/fs/test_dvc_info.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.fs.dvc import DVCFileSystem\nfrom dvc.testing.tmp_dir import make_subrepo\n\n\n@pytest.fixture\ndef dvcfs(tmp_dir, dvc, scm):\n    fs_structure = {\n        \"models\": {  # mixed dvc + git directory\n            \"train.py\": \"train dot py\",\n            \"test.py\": \"test dot py\",\n        },\n        \"README.md\": \"my little project\",  # file\n        \"src\": {  # repo-only directory\n            \"utils\": {\n                \"__init__.py\": \"\",\n                \"serve_model.py\": \"# this will serve a model `soon`\",\n            }\n        },\n    }\n    dvc_structure = {\n        \"data\": {  # dvc only directory\n            \"raw\": {\n                \"raw-1.csv\": \"one, dot, csv\",\n                \"raw-2.csv\": \"two, dot, csv\",\n            },\n            \"processed\": {\n                \"processed-1.csv\": \"1, dot, csv\",\n                \"processed-2.csv\": \"2, dot, csv\",\n            },\n        },\n        os.path.join(\"models\", \"transform.pickle\"): \"model model\",  # file\n    }\n\n    tmp_dir.scm_gen(fs_structure, commit=\"repo init\")\n    tmp_dir.dvc_gen(dvc_structure, commit=\"use dvc\")\n\n    return DVCFileSystem(repo=dvc, subrepos=True)\n\n\ndef test_info_not_existing(dvcfs):\n    with pytest.raises(FileNotFoundError):\n        dvcfs.info(\"path/that/does/not/exist\")\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        \"README.md\",\n        \"models/train.py\",\n        \"models/test.py\",\n        \"src/utils/__init__.py\",\n        \"src/utils/serve_model.py\",\n    ],\n)\ndef test_info_git_tracked_file(dvcfs, path):\n    info = dvcfs.info(path)\n\n    assert info[\"repo\"].root_dir == dvcfs.repo.root_dir\n    assert \"dvc_info\" not in info\n    assert info[\"type\"] == \"file\"\n    assert not info[\"isexec\"]\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        \"data/raw/raw-1.csv\",\n        \"data/raw/raw-2.csv\",\n        \"data/processed/processed-1.csv\",\n        \"data/processed/processed-2.csv\",\n        \"models/transform.pickle\",\n    ],\n)\ndef test_info_dvc_tracked_file(dvcfs, path):\n    info = dvcfs.info(path)\n\n    assert info[\"repo\"].root_dir == dvcfs.repo.root_dir\n    assert info[\"dvc_info\"][\"isdvc\"]\n    assert info[\"type\"] == \"file\"\n    assert not info[\"isexec\"]\n\n\n@pytest.mark.parametrize(\"path\", [\"src\", \"src/utils\"])\ndef test_info_git_only_dirs(dvcfs, path):\n    info = dvcfs.info(path)\n\n    assert info[\"repo\"].root_dir == dvcfs.repo.root_dir\n    assert \"dvc_info\" not in info\n    assert info[\"type\"] == \"directory\"\n    assert not info[\"isexec\"]\n\n\n@pytest.mark.parametrize(\"path\", [\".\", \"models\"])\ndef test_info_git_dvc_mixed_dirs(dvcfs, path):\n    info = dvcfs.info(path)\n\n    assert info[\"repo\"].root_dir == dvcfs.repo.root_dir\n    assert not info[\"dvc_info\"][\"isdvc\"]\n    assert info[\"type\"] == \"directory\"\n    assert not info[\"isexec\"]\n\n\n@pytest.mark.parametrize(\"path\", [\"data\", \"data/raw\", \"data/processed\"])\ndef test_info_dvc_only_dirs(dvcfs, path):\n    info = dvcfs.info(path)\n\n    assert info[\"repo\"].root_dir == dvcfs.repo.root_dir\n    assert info[\"dvc_info\"][\"isdvc\"]\n    assert info[\"type\"] == \"directory\"\n    assert not info[\"isexec\"]\n\n\ndef test_info_on_subrepos(make_tmp_dir, tmp_dir, dvc, scm, dvcfs):\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm)\n    with subrepo.chdir():\n        subrepo.scm_gen(\"foo\", \"foo\", commit=\"add foo on subrepo\")\n        subrepo.dvc_gen(\"foobar\", \"foobar\", commit=\"add foobar on subrepo\")\n\n    for path in [\"subrepo\", \"subrepo/foo\", \"subrepo/foobar\"]:\n        info = dvcfs.info(path)\n        assert info[\"repo\"].root_dir == str(subrepo), (\n            f\"repo root didn't match for {path}\"\n        )\n"
  },
  {
    "path": "tests/unit/fs/test_dvcfs.py",
    "content": "import os\nimport posixpath\nfrom hashlib import md5\nfrom itertools import product\n\nimport pytest\nfrom fsspec.implementations.local import LocalFileSystem, make_path_posix\nfrom fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS\n\nfrom dvc.api import DVCFileSystem\n\n\nclass DVCFixtures:\n    \"\"\"The fixtures imitate the fsspec.tests.abstract.AbstractFixtures.\n\n    This has been modified to use dvc's fixture, as DVCFileSystem is a read-only\n    filesystem, and cannot be used to create directories or files.\n\n    The `Output.ignore()` is mocked to avoid `.gitignore` files in the directories,\n    as we can reuse the tests from fsspec with minimal modifications.\n    `.gitignore` file is manually created with required patterns at the root of the\n    repository.\n    \"\"\"\n\n    @pytest.fixture\n    def fs_bulk_operations_scenario_0(self, tmp_dir):\n        \"\"\"\n        Scenario that is used for many cp/get/put tests. Creates the following\n        directory and file structure:\n\n        📁 source\n        ├── 📄 file1\n        ├── 📄 file2\n        └── 📁 subdir\n            ├── 📄 subfile1\n            ├── 📄 subfile2\n            └── 📁 nesteddir\n                └── 📄 nestedfile\n        \"\"\"\n        source = tmp_dir / \"source\"\n        source.mkdir()\n        tmp_dir.scm_gen(\n            \".gitignore\", \"/source/file2/\\nsource/subdir\", commit=\"add .gitignore\"\n        )\n\n        tmp_dir.scm_gen(\"source/file1\", \"file1\", commit=\"add file1\")\n        tmp_dir.dvc_gen(\"source/file2\", \"file2\", commit=\"add file2\")\n        tmp_dir.dvc_gen(\n            {\n                \"source/subdir\": {\n                    \"subfile1\": \"subfile1\",\n                    \"subfile2\": \"subfile2\",\n                    \"nesteddir\": {\"nestedfile\": \"nestedfile\"},\n                }\n            },\n            commit=\"add subdir\",\n        )\n        return \"/source\"\n\n    @pytest.fixture\n    def fs_10_files_with_hashed_names(self, tmp_dir, local_fs, local_join, local_path):\n        \"\"\"\n        Scenario that is used to check cp/get/put files order when source and\n        destination are lists. Creates the following directory and file structure:\n\n        📁 source\n        └── 📄 {hashed([0-9])}.txt\n        \"\"\"\n        dir_contents = {\n            md5(str(i).encode(\"utf-8\"), usedforsecurity=False).hexdigest()\n            + \".txt\": str(i)\n            for i in range(10)\n        }\n        tmp_dir.dvc_gen({\"source\": dir_contents}, commit=\"add source\")\n        tmp_dir.scm_gen(\".gitignore\", \"/source\", commit=\"add .gitignore\")\n        return \"/source\"\n\n    @pytest.fixture\n    def src_directory(self, tmp_dir):\n        # https://github.com/fsspec/filesystem_spec/issues/1062\n        # Recursive cp/get/put of source directory into non-existent target directory.\n        tmp_dir.dvc_gen({\"src\": {\"file\": \"file\"}}, commit=\"add source\")\n        return \"/src\"\n\n    @pytest.fixture\n    def fs_dir_and_file_with_same_name_prefix(self, tmp_dir):\n        \"\"\"\n        Scenario that is used to check cp/get/put on directory and file with\n        the same name prefixes. Creates the following directory and file structure:\n\n        📁 source\n        ├── 📄 subdir.txt\n        └── 📁 subdir\n            └── 📄 subfile.txt\n        \"\"\"\n        source = tmp_dir / \"source\"\n        source.mkdir()\n\n        tmp_dir.scm_gen(\".gitignore\", \"/source/subdir\", commit=\"add .gitignore\")\n        tmp_dir.scm_gen(\"source/subdir.txt\", \"subdir.txt\", commit=\"add subdir.txt\")\n        tmp_dir.dvc_gen(\n            {\"source/subdir\": {\"subfile.txt\": \"subfile.txt\"}}, commit=\"add subdir\"\n        )\n        return \"/source\"\n\n    @pytest.fixture\n    def fs_glob_edge_cases_files(self, tmp_dir):\n        \"\"\"\n        Scenario that is used for glob edge cases cp/get/put tests.\n        Creates the following directory and file structure:\n\n        📁 source\n        ├── 📄 file1\n        ├── 📄 file2\n        ├── 📁 subdir0\n        │   ├── 📄 subfile1\n        │   ├── 📄 subfile2\n        │   └── 📁 nesteddir\n        │       └── 📄 nestedfile\n        └── 📁 subdir1\n            ├── 📄 subfile1\n            ├── 📄 subfile2\n            └── 📁 nesteddir\n                └── 📄 nestedfile\n        \"\"\"\n        source = tmp_dir / \"source\"\n        source.mkdir()\n\n        tmp_dir.scm_gen(\n            \".gitignore\", \"/source/file1\\n/source/subdir1\", commit=\"add .gitignore\"\n        )\n        tmp_dir.scm_gen(\"source/file1\", \"file1\", commit=\"add file1\")\n        tmp_dir.dvc_gen(\"source/file2\", \"file2\", commit=\"add file2\")\n\n        dir_contents = {\n            \"subfile1\": \"subfile1\",\n            \"subfile2\": \"subfile2\",\n            \"nesteddir\": {\"nestedfile\": \"nestedfile\"},\n        }\n        tmp_dir.scm_gen({\"source/subdir0\": dir_contents}, commit=\"add subdir0\")\n        tmp_dir.dvc_gen({\"source/subdir1\": dir_contents}, commit=\"add subdir1\")\n        return \"/source\"\n\n    @pytest.fixture(params=[{\"rev\": \"HEAD\"}, {}])\n    def fs(self, request, tmp_dir, dvc, scm):\n        return DVCFileSystem(tmp_dir, **request.param)\n\n    @pytest.fixture(autouse=True)\n    def mock_ignore(self, mocker):\n        mocker.patch(\"dvc.output.Output.ignore\")\n\n    @pytest.fixture\n    def fs_join(self):\n        return posixpath.join\n\n    @pytest.fixture\n    def fs_path(self, fs):\n        return fs.root_marker\n\n    @pytest.fixture(scope=\"class\")\n    def local_fs(self):\n        # Maybe need an option for auto_mkdir=False?  This is only relevant\n        # for certain implementations.\n        return LocalFileSystem(auto_mkdir=True)\n\n    @pytest.fixture\n    def local_join(self):\n        \"\"\"\n        Return a function that joins its arguments together into a path, on\n        the local filesystem.\n        \"\"\"\n        return os.path.join\n\n    @pytest.fixture\n    def local_path(self, tmpdir):\n        return tmpdir\n\n    @pytest.fixture\n    def local_target(self, local_fs, local_join, local_path):\n        \"\"\"\n        Return name of local directory that does not yet exist to copy into.\n\n        Cleans up at the end of each test it which it is used.\n        \"\"\"\n        target = local_join(local_path, \"target\")\n        yield target\n        if local_fs.exists(target):\n            local_fs.rm(target, recursive=True)\n\n\nclass TestDVCFileSystemGet(DVCFixtures):\n    \"\"\"\n    This test is adapted from `fsspec.tests.abstract.get.AbstractGetTests`\n    with minor modifications to work with DVCFixtures and DVCFileSystem.\n    \"\"\"\n\n    def test_get_file_to_existing_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1a\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n        assert local_fs.isdir(target)\n\n        target_file2 = local_join(target, \"file2\")\n        target_subfile1 = local_join(target, \"subfile1\")\n\n        # Copy from source directory\n        fs.get(fs_join(source, \"file2\"), target)\n        assert local_fs.isfile(target_file2)\n\n        # Copy from sub directory\n        fs.get(fs_join(source, \"subdir\", \"subfile1\"), target)\n        assert local_fs.isfile(target_subfile1)\n\n        # Remove copied files\n        local_fs.rm([target_file2, target_subfile1])\n        assert not local_fs.exists(target_file2)\n        assert not local_fs.exists(target_subfile1)\n\n        # Repeat with trailing slash on target\n        fs.get(fs_join(source, \"file2\"), target + \"/\")\n        assert local_fs.isdir(target)\n        assert local_fs.isfile(target_file2)\n\n        fs.get(fs_join(source, \"subdir\", \"subfile1\"), target + \"/\")\n        assert local_fs.isfile(target_subfile1)\n\n    def test_get_file_to_new_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1b\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        fs.get(\n            fs_join(source, \"subdir\", \"subfile1\"), local_join(target, \"newdir/\")\n        )  # Note trailing slash\n\n        assert local_fs.isdir(target)\n        assert local_fs.isdir(local_join(target, \"newdir\"))\n        assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n\n    def test_get_file_to_file_in_existing_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1c\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        fs.get(fs_join(source, \"subdir\", \"subfile1\"), local_join(target, \"newfile\"))\n        assert local_fs.isfile(local_join(target, \"newfile\"))\n\n    def test_get_file_to_file_in_new_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1d\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        fs.get(\n            fs_join(source, \"subdir\", \"subfile1\"),\n            local_join(target, \"newdir\", \"newfile\"),\n        )\n        assert local_fs.isdir(local_join(target, \"newdir\"))\n        assert local_fs.isfile(local_join(target, \"newdir\", \"newfile\"))\n\n    def test_get_directory_to_existing_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1e\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n        assert local_fs.isdir(target)\n\n        for source_slash, target_slash in zip([False, True], [False, True]):\n            s = fs_join(source, \"subdir\")\n            if source_slash:\n                s += \"/\"\n            t = target + \"/\" if target_slash else target\n\n            # Without recursive does nothing\n            fs.get(s, t)\n            assert local_fs.ls(target) == []\n\n            # With recursive\n            fs.get(s, t, recursive=True)\n            if source_slash:\n                assert local_fs.isfile(local_join(target, \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subfile2\"))\n                assert local_fs.isdir(local_join(target, \"nesteddir\"))\n                assert local_fs.isfile(local_join(target, \"nesteddir\", \"nestedfile\"))\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n\n                local_fs.rm(\n                    [\n                        local_join(target, \"subfile1\"),\n                        local_join(target, \"subfile2\"),\n                        local_join(target, \"nesteddir\"),\n                    ],\n                    recursive=True,\n                )\n            else:\n                assert local_fs.isdir(local_join(target, \"subdir\"))\n                assert local_fs.isfile(local_join(target, \"subdir\", \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subdir\", \"subfile2\"))\n                assert local_fs.isdir(local_join(target, \"subdir\", \"nesteddir\"))\n                assert local_fs.isfile(\n                    local_join(target, \"subdir\", \"nesteddir\", \"nestedfile\")\n                )\n\n                local_fs.rm(local_join(target, \"subdir\"), recursive=True)\n            assert local_fs.ls(target) == []\n\n            # Limit recursive by maxdepth\n            fs.get(s, t, recursive=True, maxdepth=1)\n            if source_slash:\n                assert local_fs.isfile(local_join(target, \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subfile2\"))\n                assert not local_fs.exists(local_join(target, \"nesteddir\"))\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n\n                local_fs.rm(\n                    [\n                        local_join(target, \"subfile1\"),\n                        local_join(target, \"subfile2\"),\n                    ],\n                    recursive=True,\n                )\n            else:\n                assert local_fs.isdir(local_join(target, \"subdir\"))\n                assert local_fs.isfile(local_join(target, \"subdir\", \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subdir\", \"subfile2\"))\n                assert not local_fs.exists(local_join(target, \"subdir\", \"nesteddir\"))\n\n                local_fs.rm(local_join(target, \"subdir\"), recursive=True)\n            assert local_fs.ls(target) == []\n\n    def test_get_directory_to_new_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1f\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        for source_slash, target_slash in zip([False, True], [False, True]):\n            s = fs_join(source, \"subdir\")\n            if source_slash:\n                s += \"/\"\n            t = local_join(target, \"newdir\")\n            if target_slash:\n                t += \"/\"\n\n            # Without recursive does nothing\n            fs.get(s, t)\n            assert local_fs.ls(target) == []\n\n            # With recursive\n            fs.get(s, t, recursive=True)\n            assert local_fs.isdir(local_join(target, \"newdir\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile2\"))\n            assert local_fs.isdir(local_join(target, \"newdir\", \"nesteddir\"))\n            assert local_fs.isfile(\n                local_join(target, \"newdir\", \"nesteddir\", \"nestedfile\")\n            )\n            assert not local_fs.exists(local_join(target, \"subdir\"))\n\n            local_fs.rm(local_join(target, \"newdir\"), recursive=True)\n            assert local_fs.ls(target) == []\n\n            # Limit recursive by maxdepth\n            fs.get(s, t, recursive=True, maxdepth=1)\n            assert local_fs.isdir(local_join(target, \"newdir\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile2\"))\n            assert not local_fs.exists(local_join(target, \"newdir\", \"nesteddir\"))\n            assert not local_fs.exists(local_join(target, \"subdir\"))\n\n            local_fs.rm(local_join(target, \"newdir\"), recursive=True)\n            assert not local_fs.exists(local_join(target, \"newdir\"))\n\n    def test_get_glob_to_existing_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1g\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        for target_slash in [False, True]:\n            t = target + \"/\" if target_slash else target\n\n            # Without recursive\n            fs.get(fs_join(source, \"subdir\", \"*\"), t)\n            assert local_fs.isfile(local_join(target, \"subfile1\"))\n            assert local_fs.isfile(local_join(target, \"subfile2\"))\n            assert not local_fs.isdir(local_join(target, \"nesteddir\"))\n            assert not local_fs.exists(local_join(target, \"nesteddir\", \"nestedfile\"))\n            assert not local_fs.exists(local_join(target, \"subdir\"))\n\n            local_fs.rm(\n                [\n                    local_join(target, \"subfile1\"),\n                    local_join(target, \"subfile2\"),\n                ],\n                recursive=True,\n            )\n            assert local_fs.ls(target) == []\n\n            # With recursive\n            for glob, recursive in zip([\"*\", \"**\"], [True, False]):\n                fs.get(fs_join(source, \"subdir\", glob), t, recursive=recursive)\n                assert local_fs.isfile(local_join(target, \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subfile2\"))\n                assert local_fs.isdir(local_join(target, \"nesteddir\"))\n                assert local_fs.isfile(local_join(target, \"nesteddir\", \"nestedfile\"))\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n\n                local_fs.rm(\n                    [\n                        local_join(target, \"subfile1\"),\n                        local_join(target, \"subfile2\"),\n                        local_join(target, \"nesteddir\"),\n                    ],\n                    recursive=True,\n                )\n                assert local_fs.ls(target) == []\n\n                # Limit recursive by maxdepth\n                fs.get(\n                    fs_join(source, \"subdir\", glob), t, recursive=recursive, maxdepth=1\n                )\n                assert local_fs.isfile(local_join(target, \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"subfile2\"))\n                assert not local_fs.exists(local_join(target, \"nesteddir\"))\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n\n                local_fs.rm(\n                    [\n                        local_join(target, \"subfile1\"),\n                        local_join(target, \"subfile2\"),\n                    ],\n                    recursive=True,\n                )\n                assert local_fs.ls(target) == []\n\n    def test_get_glob_to_new_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1h\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        for target_slash in [False, True]:\n            t = fs_join(target, \"newdir\")\n            if target_slash:\n                t += \"/\"\n\n            # Without recursive\n            fs.get(fs_join(source, \"subdir\", \"*\"), t)\n            assert local_fs.isdir(local_join(target, \"newdir\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n            assert local_fs.isfile(local_join(target, \"newdir\", \"subfile2\"))\n            assert not local_fs.exists(local_join(target, \"newdir\", \"nesteddir\"))\n            assert not local_fs.exists(\n                local_join(target, \"newdir\", \"nesteddir\", \"nestedfile\")\n            )\n            assert not local_fs.exists(local_join(target, \"subdir\"))\n            assert not local_fs.exists(local_join(target, \"newdir\", \"subdir\"))\n\n            local_fs.rm(local_join(target, \"newdir\"), recursive=True)\n            assert local_fs.ls(target) == []\n\n            # With recursive\n            for glob, recursive in zip([\"*\", \"**\"], [True, False]):\n                fs.get(fs_join(source, \"subdir\", glob), t, recursive=recursive)\n                assert local_fs.isdir(local_join(target, \"newdir\"))\n                assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"newdir\", \"subfile2\"))\n                assert local_fs.isdir(local_join(target, \"newdir\", \"nesteddir\"))\n                assert local_fs.isfile(\n                    local_join(target, \"newdir\", \"nesteddir\", \"nestedfile\")\n                )\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n                assert not local_fs.exists(local_join(target, \"newdir\", \"subdir\"))\n\n                local_fs.rm(local_join(target, \"newdir\"), recursive=True)\n                assert not local_fs.exists(local_join(target, \"newdir\"))\n\n                # Limit recursive by maxdepth\n                fs.get(\n                    fs_join(source, \"subdir\", glob), t, recursive=recursive, maxdepth=1\n                )\n                assert local_fs.isdir(local_join(target, \"newdir\"))\n                assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n                assert local_fs.isfile(local_join(target, \"newdir\", \"subfile2\"))\n                assert not local_fs.exists(local_join(target, \"newdir\", \"nesteddir\"))\n                assert not local_fs.exists(local_join(target, \"subdir\"))\n                assert not local_fs.exists(local_join(target, \"newdir\", \"subdir\"))\n\n                local_fs.rm(local_fs.ls(target, detail=False), recursive=True)\n                assert not local_fs.exists(local_join(target, \"newdir\"))\n\n    @pytest.mark.parametrize(\n        GLOB_EDGE_CASES_TESTS[\"argnames\"],\n        GLOB_EDGE_CASES_TESTS[\"argvalues\"],\n    )\n    def test_get_glob_edge_cases(\n        self,\n        path,\n        recursive,\n        maxdepth,\n        expected,\n        fs,\n        fs_join,\n        fs_glob_edge_cases_files,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 1g\n        source = fs_glob_edge_cases_files\n\n        target = local_target\n\n        for new_dir, target_slash in product([True, False], [True, False]):\n            local_fs.mkdir(target)\n\n            t = local_join(target, \"newdir\") if new_dir else target\n            t = t + \"/\" if target_slash else t\n\n            fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)\n\n            output = local_fs.find(target)\n            if new_dir:\n                prefixed_expected = [\n                    make_path_posix(local_join(target, \"newdir\", p)) for p in expected\n                ]\n            else:\n                prefixed_expected = [\n                    make_path_posix(local_join(target, p)) for p in expected\n                ]\n            assert sorted(output) == sorted(prefixed_expected)\n\n            try:\n                local_fs.rm(target, recursive=True)\n            except FileNotFoundError:\n                pass\n\n    def test_get_list_of_files_to_existing_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 2a\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        source_files = [\n            fs_join(source, \"file1\"),\n            fs_join(source, \"file2\"),\n            fs_join(source, \"subdir\", \"subfile1\"),\n        ]\n\n        for target_slash in [False, True]:\n            t = target + \"/\" if target_slash else target\n\n            fs.get(source_files, t)\n            assert local_fs.isfile(local_join(target, \"file1\"))\n            assert local_fs.isfile(local_join(target, \"file2\"))\n            assert local_fs.isfile(local_join(target, \"subfile1\"))\n\n            local_fs.rm(\n                [\n                    local_join(target, \"file1\"),\n                    local_join(target, \"file2\"),\n                    local_join(target, \"subfile1\"),\n                ],\n                recursive=True,\n            )\n            assert local_fs.ls(target) == []\n\n    def test_get_list_of_files_to_new_directory(\n        self,\n        fs,\n        fs_join,\n        fs_bulk_operations_scenario_0,\n        local_fs,\n        local_join,\n        local_target,\n    ):\n        # Copy scenario 2b\n        source = fs_bulk_operations_scenario_0\n\n        target = local_target\n        local_fs.mkdir(target)\n\n        source_files = [\n            fs_join(source, \"file1\"),\n            fs_join(source, \"file2\"),\n            fs_join(source, \"subdir\", \"subfile1\"),\n        ]\n\n        fs.get(source_files, local_join(target, \"newdir\") + \"/\")  # Note trailing slash\n        assert local_fs.isdir(local_join(target, \"newdir\"))\n        assert local_fs.isfile(local_join(target, \"newdir\", \"file1\"))\n        assert local_fs.isfile(local_join(target, \"newdir\", \"file2\"))\n        assert local_fs.isfile(local_join(target, \"newdir\", \"subfile1\"))\n\n    def test_get_directory_recursive(\n        self, src_directory, fs, fs_join, fs_path, local_fs, local_join, local_target\n    ):\n        target = local_target\n        src = src_directory\n\n        # get without slash\n        assert not local_fs.exists(target)\n        for loop in range(2):\n            fs.get(src, target, recursive=True)\n            assert local_fs.isdir(target)\n\n            if loop == 0:\n                assert local_fs.isfile(local_join(target, \"file\"))\n                assert not local_fs.exists(local_join(target, \"src\"))\n            else:\n                assert local_fs.isfile(local_join(target, \"file\"))\n                assert local_fs.isdir(local_join(target, \"src\"))\n                assert local_fs.isfile(local_join(target, \"src\", \"file\"))\n\n        local_fs.rm(target, recursive=True)\n\n        # get with slash\n        assert not local_fs.exists(target)\n        for _ in range(2):\n            fs.get(src + \"/\", target, recursive=True)\n            assert local_fs.isdir(target)\n            assert local_fs.isfile(local_join(target, \"file\"))\n            assert not local_fs.exists(local_join(target, \"src\"))\n\n    def test_get_directory_without_files_with_same_name_prefix(\n        self,\n        fs,\n        fs_join,\n        local_fs,\n        local_join,\n        local_target,\n        fs_dir_and_file_with_same_name_prefix,\n    ):\n        # Create the test dirs\n        source = fs_dir_and_file_with_same_name_prefix\n        target = local_target\n\n        # Test without glob\n        fs.get(fs_join(source, \"subdir\"), target, recursive=True)\n\n        assert local_fs.isfile(local_join(target, \"subfile.txt\"))\n        assert not local_fs.isfile(local_join(target, \"subdir.txt\"))\n\n        local_fs.rm([local_join(target, \"subfile.txt\")])\n        assert local_fs.ls(target) == []\n\n        # Test with glob\n        fs.get(fs_join(source, \"subdir*\"), target, recursive=True)\n\n        assert local_fs.isdir(local_join(target, \"subdir\"))\n        assert local_fs.isfile(local_join(target, \"subdir\", \"subfile.txt\"))\n        assert local_fs.isfile(local_join(target, \"subdir.txt\"))\n\n    def test_get_with_source_and_destination_as_list(\n        self,\n        fs,\n        fs_join,\n        local_fs,\n        local_join,\n        local_target,\n        fs_10_files_with_hashed_names,\n    ):\n        # Create the test dir\n        source = fs_10_files_with_hashed_names\n        target = local_target\n\n        # Create list of files for source and destination\n        source_files = []\n        destination_files = []\n        for i in range(10):\n            hashed_i = md5(str(i).encode(\"utf-8\"), usedforsecurity=False).hexdigest()\n            source_files.append(fs_join(source, f\"{hashed_i}.txt\"))\n            destination_files.append(\n                make_path_posix(local_join(target, f\"{hashed_i}.txt\"))\n            )\n\n        # Copy and assert order was kept\n        fs.get(rpath=source_files, lpath=destination_files)\n\n        for i in range(10):\n            file_content = local_fs.cat(destination_files[i]).decode(\"utf-8\")\n            assert file_content == str(i)\n\n\ndef test_maxdepth(tmp_dir, dvc, scm):\n    tmp_dir.dvc_gen(\n        {\n            \"dir\": {\n                \"file1\": \"file1\",\n                \"subdir\": {\n                    \"file2\": \"file2\",\n                    \"subdir2\": {\"file3\": \"file3\", \"subdir3\": {\"file4\": \"file4\"}},\n                },\n            }\n        },\n        commit=\"add dir\",\n    )\n\n    fs = DVCFileSystem(tmp_dir)\n    fs.get(\"dir\", \"dir1\", recursive=True, maxdepth=1)\n    assert (tmp_dir / \"dir1\").read_text() == {\"file1\": \"file1\"}\n\n    fs.get(\"dir\", \"dir2\", recursive=True, maxdepth=2)\n    assert (tmp_dir / \"dir2\").read_text() == {\n        \"file1\": \"file1\",\n        \"subdir\": {\"file2\": \"file2\"},\n    }\n\n    fs.get(\"dir\", \"dir3\", recursive=True, maxdepth=3)\n    assert (tmp_dir / \"dir3\").read_text() == {\n        \"file1\": \"file1\",\n        \"subdir\": {\"file2\": \"file2\", \"subdir2\": {\"file3\": \"file3\"}},\n    }\n\n    fs.get(\"dir\", \"dir4\", recursive=True, maxdepth=4)\n    assert (tmp_dir / \"dir4\").read_text() == {\n        \"file1\": \"file1\",\n        \"subdir\": {\n            \"file2\": \"file2\",\n            \"subdir2\": {\"file3\": \"file3\", \"subdir3\": {\"file4\": \"file4\"}},\n        },\n    }\n\n\n@pytest.mark.parametrize(\n    \"fs_args\",\n    [\n        lambda tmp_dir, dvc: ((), {}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((dvc,), {}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((tmp_dir,), {}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((str(tmp_dir),), {}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((), {\"repo\": tmp_dir}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((), {\"repo\": os.fspath(tmp_dir)}),  # noqa: ARG005\n        # url= is deprecated, but is still supported for backward compatibility\n        lambda tmp_dir, dvc: ((), {\"url\": tmp_dir}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((), {\"url\": os.fspath(tmp_dir)}),  # noqa: ARG005\n        lambda tmp_dir, dvc: ((), {\"repo\": dvc}),  # noqa: ARG005\n    ],\n)\ndef test_init_arg(tmp_dir, dvc, fs_args):\n    args, kwargs = fs_args(tmp_dir, dvc)\n    fs = DVCFileSystem(*args, **kwargs)\n\n    assert fs.repo.root_dir == dvc.root_dir\n"
  },
  {
    "path": "tests/unit/fs/test_fs.py",
    "content": "import pytest\n\nfrom dvc.config import RemoteNotFoundError\nfrom dvc.fs import LocalFileSystem, get_cloud_fs, get_fs_cls, get_fs_config\nfrom dvc_http import HTTPFileSystem, HTTPSFileSystem\nfrom dvc_s3 import S3FileSystem\nfrom dvc_ssh import SSHFileSystem\n\nurl_cls_pairs = [\n    (\"s3://bucket/path\", S3FileSystem),\n    (\"ssh://example.com:/dir/path\", SSHFileSystem),\n    (\"http://example.com/path/to/file\", HTTPFileSystem),\n    (\"https://example.com/path/to/file\", HTTPSFileSystem),\n    (\"path/to/file\", LocalFileSystem),\n    (\"path\\\\to\\\\file\", LocalFileSystem),\n    (\"file\", LocalFileSystem),\n    (\"./file\", LocalFileSystem),\n    (\".\\\\file\", LocalFileSystem),\n    (\"../file\", LocalFileSystem),\n    (\"..\\\\file\", LocalFileSystem),\n    (\"unknown://path\", LocalFileSystem),\n]\n\n\ntry:\n    from dvc_hdfs import HDFSFileSystem\n\n    url_cls_pairs += [(\"hdfs://example.com/dir/path\", HDFSFileSystem)]\nexcept ImportError:\n    pass\n\n\n@pytest.mark.parametrize(\"url, cls\", url_cls_pairs)\ndef test_get_fs_cls(url, cls):\n    assert get_fs_cls({\"url\": url}) == cls\n\n\ndef test_get_fs_config():\n    result = get_fs_config({}, url=\"ssh://example.com:/dir/path\")\n    assert result == {\"url\": \"ssh://example.com:/dir/path\"}\n\n\ndef test_get_fs_config_error():\n    with pytest.raises(RemoteNotFoundError):\n        get_fs_config({\"remote\": {}}, name=\"myremote\")\n\n\ndef test_remote_url():\n    config = {\n        \"remote\": {\n            \"base\": {\"url\": \"http://example.com\"},\n            \"r1\": {\"url\": \"remote://base/r1\", \"user\": \"user\"},\n            \"r2\": {\"url\": \"remote://r1/r2\", \"password\": \"123\"},\n        }\n    }\n    result = get_fs_config(config, url=\"remote://r2/foo\")\n    assert result == {\n        \"password\": \"123\",\n        \"user\": \"user\",\n        \"url\": \"http://example.com/r1/r2/foo\",\n    }\n\n\ndef test_get_cloud_fs():\n    cls, config, path = get_cloud_fs({}, url=\"ssh://example.com:/dir/path\")\n    assert cls is SSHFileSystem\n    assert config == {\"host\": \"example.com\", \"verify\": False}\n    assert path == \"/dir/path\"\n"
  },
  {
    "path": "tests/unit/fs/test_tree.py",
    "content": "import pytest\n\nfrom dvc.config import ConfigError\nfrom dvc.fs import get_cloud_fs\n\n\ndef test_get_cloud_fs(tmp_dir, dvc):\n    tmp_dir.add_remote(name=\"base\", url=\"s3://bucket/path\", default=False)\n    tmp_dir.add_remote(name=\"first\", url=\"remote://base/first\", default=False)\n    tmp_dir.add_remote(name=\"second\", url=\"remote://first/second\", default=False)\n\n    base = \"bucket/path\"\n    first = f\"{base}/first\"\n    second = f\"{first}/second\"\n\n    _, _, path = get_cloud_fs(dvc.config, name=\"base\")\n    assert path == base\n    _, _, path = get_cloud_fs(dvc.config, name=\"first\")\n    assert path == first\n    _, _, path = get_cloud_fs(dvc.config, name=\"second\")\n    assert path == second\n\n\ndef test_get_cloud_fs_validate(tmp_dir, dvc):\n    tmp_dir.add_remote(name=\"base\", url=\"ssh://example.com/path\", default=False)\n    tmp_dir.add_remote(\n        name=\"first\",\n        config={\"url\": \"remote://base/first\", \"type\": \"symlink\"},\n        default=False,\n    )\n    tmp_dir.add_remote(\n        name=\"second\",\n        config={\"url\": \"remote://first/second\", \"oss_key_id\": \"mykey\"},\n        default=False,\n    )\n\n    assert get_cloud_fs(dvc.config, name=\"base\")[1][\"host\"] == \"example.com\"\n    assert get_cloud_fs(dvc.config, name=\"first\")[1][\"host\"] == \"example.com\"\n    assert get_cloud_fs(dvc.config, name=\"first\")[1][\"type\"] == [\"symlink\"]\n\n    with pytest.raises(ConfigError):\n        get_cloud_fs(dvc.config, name=\"second\")\n"
  },
  {
    "path": "tests/unit/output/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/output/test_annotations.py",
    "content": "import pytest\n\nfrom dvc.annotations import Annotation\n\n\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [\n        {\"desc\": \"desc\", \"type\": \"type\", \"labels\": [\"label1\", \"label2\"]},\n        {\"desc\": \"desc\", \"type\": \"type\", \"meta\": {\"key\": \"value\"}},\n    ],\n)\ndef test_annotation_to_dict(kwargs):\n    annot = Annotation(**kwargs)\n    assert annot.to_dict() == kwargs\n"
  },
  {
    "path": "tests/unit/output/test_load.py",
    "content": "import pytest\n\nfrom dvc import output\nfrom dvc.fs import LocalFileSystem\nfrom dvc.output import Output\nfrom dvc.stage import Stage\nfrom dvc_s3 import S3FileSystem\n\n\n@pytest.mark.parametrize(\n    \"out_type,type_test_func\",\n    [\n        (\"outs\", lambda o: not (o.metric or o.plot)),\n        (\"metrics\", lambda o: o.metric and not o.plot),\n        (\"plots\", lambda o: o.plot and not o.metric),\n    ],\n    ids=(\"outs\", \"metrics\", \"plots\"),\n)\ndef test_load_from_pipeline(dvc, out_type, type_test_func):\n    outs = output.load_from_pipeline(\n        Stage(dvc),\n        [\n            \"file1\",\n            \"file2\",\n            {\"file3\": {\"cache\": True}},\n            {},\n            {\"file4\": {\"cache\": False}},\n            {\"file5\": {\"persist\": False}},\n            {\"file6\": {\"persist\": True, \"cache\": False}},\n        ],\n        out_type,\n    )\n    cached_outs = {\"file1\", \"file2\", \"file3\", \"file5\"}\n    persisted_outs = {\"file6\"}\n    assert len(outs) == 6\n\n    for i, out in enumerate(outs, start=1):\n        assert isinstance(out, Output)\n        assert isinstance(out.fs, LocalFileSystem)\n        assert out.def_path == f\"file{i}\"\n        assert out.use_cache == (out.def_path in cached_outs)\n        assert out.persist == (out.def_path in persisted_outs)\n        assert not out.hash_info\n        assert type_test_func(out)\n\n\ndef test_load_from_pipeline_accumulates_flag(dvc):\n    outs = output.load_from_pipeline(\n        Stage(dvc),\n        [\n            \"file1\",\n            {\"file2\": {\"cache\": False}},\n            {\"file1\": {\"persist\": False}},\n            {\"file2\": {\"persist\": True}},\n        ],\n        \"outs\",\n    )\n    for out in outs:\n        assert isinstance(out, Output)\n        assert isinstance(out.fs, LocalFileSystem)\n        assert not out.plot\n        assert not out.metric\n        assert not out.hash_info\n\n    assert outs[0].use_cache\n    assert not outs[0].persist\n    assert not outs[1].use_cache\n    assert outs[1].persist\n\n\ndef test_load_remote_files_from_pipeline(dvc):\n    stage = Stage(dvc)\n    (out,) = output.load_from_pipeline(\n        stage, [{\"s3://dvc-test/file.txt\": {\"cache\": False}}], typ=\"metrics\"\n    )\n    assert isinstance(out, Output)\n    assert isinstance(out.fs, S3FileSystem)\n    assert not out.plot\n    assert out.metric\n    assert not out.persist\n    assert not out.hash_info\n\n\ndef test_load_remote(dvc):\n    stage = Stage(dvc)\n    (foo, bar) = output.load_from_pipeline(\n        stage,\n        [\"foo\", {\"bar\": {\"remote\": \"myremote\"}}],\n    )\n    assert foo.remote is None\n    assert bar.remote == \"myremote\"\n\n\n@pytest.mark.parametrize(\"typ\", [None, \"\", \"illegal\"])\ndef test_load_from_pipeline_error_on_typ(dvc, typ):\n    with pytest.raises(\n        ValueError, match=f\"'{typ}' key is not allowed for pipeline files.\"\n    ):\n        output.load_from_pipeline(Stage(dvc), [\"file1\"], typ)\n\n\n@pytest.mark.parametrize(\"key\", [3, [\"list\"]])\ndef test_load_from_pipeline_illegal_type(dvc, key):\n    stage = Stage(dvc)\n    with pytest.raises(ValueError, match=f\"'{type(key).__name__}' not supported.\"):\n        output.load_from_pipeline(stage, [key], \"outs\")\n    with pytest.raises(\n        ValueError,\n        match=f\"Expected dict for 'key', got: '{type(key).__name__}'\",\n    ):\n        output.load_from_pipeline(stage, [{\"key\": key}], \"outs\")\n\n\ndef test_plots_load_from_pipeline(dvc):\n    outs = output.load_from_pipeline(\n        Stage(dvc),\n        [\n            \"file1\",\n            {\n                \"file2\": {\n                    \"persist\": True,\n                    \"cache\": False,\n                    \"x\": 3,\n                    \"random\": \"val\",\n                }\n            },\n        ],\n        \"plots\",\n    )\n    assert isinstance(outs[0], Output)\n    assert isinstance(outs[0].fs, LocalFileSystem)\n    assert outs[0].use_cache\n    assert outs[0].plot is True\n    assert not outs[0].metric\n    assert not outs[0].persist\n\n    assert isinstance(outs[1], Output)\n    assert isinstance(outs[1].fs, LocalFileSystem)\n    assert not outs[1].use_cache\n    assert outs[1].plot == {\"x\": 3}\n    assert not outs[1].metric\n    assert outs[1].persist\n"
  },
  {
    "path": "tests/unit/output/test_local.py",
    "content": "import os\n\nfrom dvc.output import Output\nfrom dvc.stage import Stage\nfrom dvc_data.hashfile.hash_info import HashInfo\nfrom dvc_data.hashfile.meta import Meta\n\n\ndef test_str_workdir_outside_repo(tmp_dir, erepo_dir):\n    stage = Stage(erepo_dir.dvc)\n    output = Output(stage, \"path\", cache=False)\n\n    assert os.path.abspath(\"path\") == str(output)\n\n\ndef test_str_workdir_inside_repo(dvc):\n    stage = Stage(dvc)\n    output = Output(stage, \"path\", cache=False)\n\n    assert str(output) == \"path\"\n\n    stage = Stage(dvc, wdir=\"some_folder\")\n    output = Output(stage, \"path\", cache=False)\n\n    assert os.path.join(\"some_folder\", \"path\") == str(output)\n\n\ndef test_str_on_local_absolute_path(dvc):\n    stage = Stage(dvc)\n\n    rel_path = os.path.join(\"path\", \"to\", \"file\")\n    abs_path = os.path.abspath(rel_path)\n    output = Output(stage, abs_path, cache=False)\n\n    assert output.def_path == rel_path\n    assert output.fs_path == abs_path\n    assert str(output) == rel_path\n\n\ndef test_str_on_external_absolute_path(dvc):\n    stage = Stage(dvc)\n\n    rel_path = os.path.join(\"..\", \"path\", \"to\", \"file\")\n    abs_path = os.path.abspath(rel_path)\n    output = Output(stage, abs_path, cache=False)\n\n    assert output.def_path == abs_path\n    assert output.fs_path == abs_path\n    assert str(output) == abs_path\n\n\ndef test_return_0_on_no_cache(dvc):\n    o = Output(Stage(dvc), \"path\")\n    o.use_cache = False\n    assert o.get_files_number() == 0\n\n\ndef test_return_multiple_for_dir(dvc):\n    o = Output(Stage(dvc), \"path\")\n    o.hash_info = HashInfo(\"md5\", \"12345678.dir\")\n    o.meta = Meta(nfiles=2)\n    assert o.get_files_number() == 2\n\n\ndef test_return_1_on_single_file_cache(mocker, dvc):\n    mocker.patch.object(Output, \"is_dir_checksum\", False)\n    o = Output(Stage(dvc), \"path\")\n    o.hash_info = HashInfo(\"md5\", \"12345678\")\n    assert o.get_files_number() == 1\n"
  },
  {
    "path": "tests/unit/output/test_output.py",
    "content": "import logging\nimport os\n\nimport pytest\nfrom funcy import first\nfrom voluptuous import MultipleInvalid, Schema\n\nfrom dvc.fs import RemoteMissingDepsError\nfrom dvc.ignore import CheckIgnoreResult\nfrom dvc.output import CHECKSUM_SCHEMA, Output\nfrom dvc.stage import Stage\nfrom dvc.utils.fs import remove\n\n\ndef test_save_missing(dvc, mocker):\n    stage = Stage(dvc)\n    out = Output(stage, \"path\", cache=False)\n    mocker.patch.object(out.fs, \"exists\", return_value=False)\n    with pytest.raises(out.DoesNotExistError):\n        out.save()\n\n\n@pytest.mark.parametrize(\n    \"value,expected\",\n    [\n        (\"\", None),\n        (None, None),\n        (11111, \"11111\"),\n        (\"11111\", \"11111\"),\n        (\"aAaBa\", \"aaaba\"),\n        (\n            \"3cc286c534a71504476da009ed174423\",\n            \"3cc286c534a71504476da009ed174423\",\n        ),  # md5\n        (\n            \"d41d8cd98f00b204e9800998ecf8427e-38\",\n            \"d41d8cd98f00b204e9800998ecf8427e-38\",\n        ),  # etag\n        (\n            \"000002000000000000000000c16859d1d071c6b1ffc9c8557d4909f1\",\n            \"000002000000000000000000c16859d1d071c6b1ffc9c8557d4909f1\",\n        ),  # hdfs checksum\n        # Not much we can do about hex and oct values without writing our own\n        # parser. So listing these test cases just to acknowledge this.\n        # See https://github.com/treeverse/dvc/issues/3331.\n        (0x3451, \"13393\"),\n        (0o1244, \"676\"),\n    ],\n)\ndef test_checksum_schema(value, expected):\n    assert Schema(CHECKSUM_SCHEMA)(value) == expected\n\n\n@pytest.mark.parametrize(\"value\", [\"1\", \"11\", {}, {\"a\": \"b\"}, [], [1, 2]])\ndef test_checksum_schema_fail(value):\n    with pytest.raises(MultipleInvalid):\n        assert Schema(CHECKSUM_SCHEMA)(value)\n\n\n@pytest.mark.parametrize(\n    \"exists, expected_message\",\n    [\n        (\n            False,\n            (\n                \"Output 'path'(stage: 'stage.dvc') is missing version info. \"\n                \"Cache for it will not be collected. \"\n                \"Use `dvc repro` to get your pipeline up to date.\"\n            ),\n        ),\n        (\n            True,\n            (\n                \"Output 'path'(stage: 'stage.dvc') is missing version info. \"\n                \"Cache for it will not be collected. \"\n                \"Use `dvc repro` to get your pipeline up to date.\\n\"\n                \"You can also use `dvc commit stage.dvc` to associate \"\n                \"existing 'path' with stage: 'stage.dvc'.\"\n            ),\n        ),\n    ],\n)\ndef test_get_used_objs(exists, expected_message, mocker, caplog):\n    stage = mocker.MagicMock()\n    mocker.patch.object(stage, \"__str__\", return_value=\"stage: 'stage.dvc'\")\n    mocker.patch.object(stage, \"addressing\", \"stage.dvc\")\n    mocker.patch.object(stage, \"wdir\", os.getcwd())\n    mocker.patch.object(stage.repo, \"root_dir\", os.getcwd())\n    mocker.patch.object(stage.repo.dvcignore, \"is_ignored\", return_value=False)\n    mocker.patch.object(\n        stage.repo.dvcignore,\n        \"check_ignore\",\n        return_value=CheckIgnoreResult(\"path\", False, []),\n    )\n    stage.repo.fs.version_aware = False\n    stage.repo.fs.PARAM_CHECKSUM = \"md5\"\n\n    output = Output(stage, \"path\")\n\n    mocker.patch.object(output, \"use_cache\", True)\n    mocker.patch.object(stage, \"is_repo_import\", False)\n    mocker.patch.object(\n        Output, \"exists\", new_callable=mocker.PropertyMock\n    ).return_value = exists\n\n    caplog.clear()\n    with caplog.at_level(logging.WARNING, logger=\"dvc\"):\n        assert output.get_used_objs() == {}\n    assert first(caplog.messages) == expected_message\n\n\ndef test_remote_missing_dependency_on_dir_pull(tmp_dir, scm, dvc, mocker):\n    tmp_dir.dvc_gen({\"dir\": {\"subfile\": \"file2 content\"}}, commit=\"add dir\")\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"s3\"] = {\"url\": \"s3://bucket/name\"}\n        conf[\"core\"] = {\"remote\": \"s3\"}\n\n    remove(\"dir\")\n    remove(dvc.cache.local.path)\n\n    mocker.patch(\n        \"dvc.data_cloud.DataCloud.get_remote\",\n        side_effect=RemoteMissingDepsError(dvc.fs, \"azure\", \"azure://\", []),\n    )\n    with pytest.raises(RemoteMissingDepsError):\n        dvc.pull()\n\n\ndef test_hash_info_cloud_versioning_dir(mocker):\n    stage = mocker.MagicMock()\n    stage.repo.fs.version_aware = False\n    stage.repo.fs.PARAM_CHECKSUM = \"etag\"\n    files = [\n        {\n            \"size\": 3,\n            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"bar\",\n        },\n        {\n            \"size\": 3,\n            \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"foo\",\n        },\n    ]\n    out = Output(stage, \"path\", files=files)\n    # `hash_info`` and `meta`` constructed from `files``\n    assert out.hash_info.name == \"md5\"\n    assert out.hash_info.value == \"77e8000f532886eef8ee1feba82e9bad.dir\"\n    assert out.meta.isdir\n    assert out.meta.nfiles == 2\n    assert out.meta.size == 6\n\n\ndef test_dumpd_cloud_versioning_dir(mocker):\n    stage = mocker.MagicMock()\n    stage.repo.fs.version_aware = False\n    stage.repo.fs.PARAM_CHECKSUM = \"md5\"\n    files = [\n        {\n            \"size\": 3,\n            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"bar\",\n        },\n        {\n            \"size\": 3,\n            \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"foo\",\n        },\n    ]\n    out = Output(stage, \"path\", files=files)\n\n    dumpd = out.dumpd()\n    assert dumpd == {\"path\": \"path\", \"hash\": \"md5\", \"files\": files}\n\n\ndef test_version_aware_is_set_based_on_files(mocker):\n    import dvc.fs as dvc_fs\n\n    get_fs_config = mocker.spy(dvc_fs, \"get_fs_config\")\n\n    stage = mocker.MagicMock()\n    stage.repo.fs.version_aware = False\n    stage.repo.fs.PARAM_CHECKSUM = \"etag\"\n    files = [\n        {\n            \"size\": 3,\n            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"bar\",\n        }\n    ]\n    Output(stage, \"path\", files=files)\n    # version_aware is passed as `True` if `files` is present`.\n    # This will be intentionally ignored in filesystems that don't handle it\n    # in `_prepare_credentials`.\n    assert get_fs_config.call_args_list[0][1] == {\"url\": \"path\", \"version_aware\": True}\n"
  },
  {
    "path": "tests/unit/remote/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/remote/test_oss.py",
    "content": "from dvc_oss import OSSFileSystem\n\nbucket_name = \"bucket-name\"\nendpoint = \"endpoint\"\nkey_id = \"Fq2UVErCz4I6tq\"\nkey_secret = \"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsu\"\n\n\ndef test_init(dvc):\n    prefix = \"some/prefix\"\n    url = f\"oss://{bucket_name}/{prefix}\"\n    config = {\n        \"url\": url,\n        \"oss_key_id\": key_id,\n        \"oss_key_secret\": key_secret,\n        \"oss_endpoint\": endpoint,\n    }\n    fs = OSSFileSystem(**config)\n    assert fs.fs_args[\"endpoint\"] == endpoint\n    assert fs.fs_args[\"key\"] == key_id\n    assert fs.fs_args[\"secret\"] == key_secret\n"
  },
  {
    "path": "tests/unit/remote/test_remote.py",
    "content": "import pytest\n\nfrom dvc.fs import get_cloud_fs\nfrom dvc_gs import GSFileSystem\nfrom dvc_s3 import S3FileSystem\n\n\ndef test_remote_with_hash_jobs(dvc):\n    dvc.config[\"remote\"][\"with_hash_jobs\"] = {\n        \"url\": \"s3://bucket/name\",\n        \"checksum_jobs\": 100,\n    }\n    dvc.config[\"core\"][\"checksum_jobs\"] = 200\n\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"with_hash_jobs\")\n    fs = cls(**config)\n    assert fs.hash_jobs == 100\n\n\ndef test_remote_with_jobs(dvc):\n    dvc.config[\"remote\"][\"with_jobs\"] = {\"url\": \"s3://bucket/name\", \"jobs\": 100}\n\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"with_jobs\")\n    fs = cls(**config)\n    assert fs.jobs == 100\n\n\ndef test_remote_without_hash_jobs(dvc):\n    dvc.config[\"remote\"][\"without_hash_jobs\"] = {\"url\": \"s3://bucket/name\"}\n    dvc.config[\"core\"][\"checksum_jobs\"] = 200\n\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"without_hash_jobs\")\n    fs = cls(**config)\n    assert fs.hash_jobs == 200\n\n\ndef test_remote_without_hash_jobs_default(dvc):\n    dvc.config[\"remote\"][\"without_hash_jobs\"] = {\"url\": \"s3://bucket/name\"}\n\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"without_hash_jobs\")\n    fs = cls(**config)\n    assert fs.hash_jobs == fs.HASH_JOBS\n\n\n@pytest.mark.parametrize(\"fs_cls\", [GSFileSystem, S3FileSystem])\ndef test_makedirs_not_create_for_top_level_path(fs_cls, dvc, mocker):\n    url = f\"{fs_cls.protocol}://bucket/\"\n    fs = fs_cls(url=url)\n    mocked_client = mocker.PropertyMock()\n    mocker.patch.object(fs_cls, \"fs\", mocked_client)\n\n    fs.makedirs(url)\n    assert not mocked_client.called\n"
  },
  {
    "path": "tests/unit/remote/test_webdav.py",
    "content": "import pytest\n\nfrom dvc.fs import get_cloud_fs\nfrom dvc_webdav import WebDAVFileSystem, WebDAVSFileSystem\nfrom tests.utils.asserts import issubset\n\nurl_fmt = \"{scheme}://{user}@example.com/public.php/webdav\"\nurl = \"webdav://example.com/public.php/webdav\"\nuser = \"username\"\npassword = \"password\"\ntoken = \"4MgjsNM5aSJjxIKM\"\ncustom_auth_header = \"Custom-Header\"\n\n\ndef test_common():\n    fs = WebDAVFileSystem(\n        url=url,\n        cert_path=\"cert/path\",\n        key_path=\"key/path\",\n        ssl_verify=\"bundle.pem\",\n        timeout=10,\n        prefix=\"/public.php/webdav\",\n        user=None,\n        password=None,\n        ask_password=False,\n        token=None,\n        custom_auth_header=None,\n    )\n    assert issubset(\n        {\n            \"headers\": {},\n            \"auth\": None,\n            \"base_url\": url,\n            \"cert\": (\"cert/path\", \"key/path\"),\n            \"verify\": \"bundle.pem\",\n            \"timeout\": 10,\n        },\n        fs.fs_args,\n    )\n    assert fs.prefix == \"/public.php/webdav\"\n\n\ndef test_user():\n    fs = WebDAVFileSystem(url=url, user=user)\n    assert issubset({\"auth\": (user, None), \"headers\": {}}, fs.fs_args)\n\n\ndef test_password():\n    config = {\"url\": url, \"user\": user, \"password\": password}\n    fs = WebDAVFileSystem(**config)\n    assert issubset(\n        {\n            \"headers\": {},\n            \"auth\": (user, password),\n        },\n        fs.fs_args,\n    )\n\n\ndef test_token():\n    config = {\"token\": token, \"url\": url}\n    fs = WebDAVFileSystem(**config)\n    assert issubset(\n        {\"headers\": {\"Authorization\": f\"Bearer {token}\"}, \"auth\": None},\n        fs.fs_args,\n    )\n\n\ndef test_ask_password(mocker):\n    ask_password_mocked = mocker.patch(\"dvc_webdav.ask_password\", return_value=\"pass\")\n    host = \"host\"\n\n    # it should not ask for password as password is set\n    config = {\n        \"url\": url,\n        \"user\": user,\n        \"password\": password,\n        \"ask_password\": True,\n        \"host\": host,\n    }\n    fs = WebDAVFileSystem(**config)\n    assert issubset({\"auth\": (user, password), \"headers\": {}}, fs.fs_args)\n\n    config.pop(\"password\")\n    fs = WebDAVFileSystem(**config)\n    assert issubset({\"auth\": (user, \"pass\"), \"headers\": {}}, fs.fs_args)\n    ask_password_mocked.assert_called_once_with(host, user)\n\n\ndef test_custom_auth_header():\n    config = {\n        \"url\": url,\n        \"custom_auth_header\": custom_auth_header,\n        \"password\": password,\n    }\n    fs = WebDAVFileSystem(**config)\n    assert issubset(\n        {\"headers\": {custom_auth_header: password}, \"auth\": None},\n        fs.fs_args,\n    )\n\n\ndef test_ask_password_custom_auth_header(mocker):\n    ask_password_mocked = mocker.patch(\"dvc_webdav.ask_password\", return_value=\"pass\")\n    host = \"host\"\n\n    # it should not ask for password as password is set\n    config = {\n        \"url\": url,\n        \"custom_auth_header\": custom_auth_header,\n        \"password\": password,\n        \"ask_password\": True,\n        \"host\": host,\n    }\n    fs = WebDAVFileSystem(**config)\n    assert issubset(\n        {\"headers\": {custom_auth_header: password}, \"auth\": None}, fs.fs_args\n    )\n\n    config.pop(\"password\")\n    fs = WebDAVFileSystem(**config)\n    assert issubset({\"headers\": {custom_auth_header: \"pass\"}, \"auth\": None}, fs.fs_args)\n    ask_password_mocked.assert_called_once_with(host, custom_auth_header)\n\n\ndef test_ssl_verify_custom_cert():\n    config = {\"url\": url, \"ssl_verify\": \"/path/to/custom/cabundle.pem\"}\n\n    fs = WebDAVFileSystem(**config)\n    assert fs.fs_args[\"verify\"] == \"/path/to/custom/cabundle.pem\"\n\n\n@pytest.mark.parametrize(\n    \"base_url, fs_cls\",\n    [\n        (url_fmt.format(scheme=\"webdav\", user=user), WebDAVFileSystem),\n        (url_fmt.format(scheme=\"webdavs\", user=user), WebDAVSFileSystem),\n    ],\n)\ndef test_remote_with_jobs(dvc, base_url, fs_cls):\n    scheme = \"http\" + (\"s\" if fs_cls is WebDAVSFileSystem else \"\")\n    remote_config = {\"url\": base_url}\n\n    dvc.config[\"remote\"][\"dav\"] = remote_config\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"dav\")\n    assert config[\"user\"] == user\n    assert f\"{scheme}://{user}@example.com\" in config[\"host\"]\n    assert cls is fs_cls\n\n    # config from remote takes priority\n    remote_config.update({\"user\": \"admin\"})\n    cls, config, _ = get_cloud_fs(dvc.config, name=\"dav\")\n    assert config[\"user\"] == \"admin\"\n    assert f\"{scheme}://{user}@example.com\" in config[\"host\"]\n    assert cls is fs_cls\n\n\ndef test_bearer_token_command(mocker):\n    mock_auth_obj = mocker.Mock()\n    mock_get_auth = mocker.patch(\n        \"dvc_webdav.get_bearer_auth\", return_value=mock_auth_obj\n    )\n\n    command = \"my-token-cmd.sh\"\n    timeout = 10\n\n    # Configuration with potential conflicts\n    config = {\n        \"url\": url,\n        \"token\": \"static-token-value\",\n        \"user\": \"conflict-user\",\n        \"password\": \"conflict-password\",\n        \"bearer_token_command\": command,\n        \"timeout\": timeout,\n    }\n\n    fs = WebDAVFileSystem(**config)\n\n    # the factory function was invoked with the correct arguments from the config.\n    mock_get_auth.assert_called_once_with(command, timeout)\n\n    # the generated auth object is correctly injected into `fs_args`.\n    assert fs.fs_args[\"auth\"] == mock_auth_obj\n"
  },
  {
    "path": "tests/unit/remote/test_webhdfs.py",
    "content": "import pytest\nimport requests\n\nfrom dvc_webhdfs import WebHDFSFileSystem\n\nhost = \"host\"\nkerberos = False\nkerberos_principal = \"principal\"\nport = 12345\nproxy_to = \"proxy\"\nssl_verify = False\ntoken = \"token\"\nuse_https = True\nuser = \"test\"\n\n\n@pytest.fixture(name=\"webhdfs_config\")\ndef fixture_webhdfs_config():\n    url = f\"webhdfs://{user}@{host}:{port}\"\n    url_config = WebHDFSFileSystem._get_kwargs_from_urls(url)\n    return {\n        \"kerberos\": kerberos,\n        \"kerberos_principal\": kerberos_principal,\n        \"proxy_to\": proxy_to,\n        \"ssl_verify\": ssl_verify,\n        \"token\": token,\n        \"use_https\": use_https,\n        **url_config,\n    }\n\n\ndef test_init(dvc, webhdfs_config):\n    fs = WebHDFSFileSystem(**webhdfs_config)\n    assert fs.fs_args[\"host\"] == host\n    assert fs.fs_args[\"token\"] == token\n    assert fs.fs_args[\"user\"] == user\n    assert fs.fs_args[\"port\"] == port\n    assert fs.fs_args[\"kerberos\"] == kerberos\n    assert fs.fs_args[\"kerb_kwargs\"] == {\"principal\": kerberos_principal}\n    assert fs.fs_args[\"proxy_to\"] == proxy_to\n    assert fs.fs_args[\"use_https\"] == use_https\n\n\ndef test_verify_ssl(dvc, webhdfs_config, monkeypatch, mocker):\n    mock_session = mocker.create_autospec(requests.Session)\n    monkeypatch.setattr(requests, \"Session\", mocker.Mock(return_value=mock_session))\n    # can't have token at the same time as user or proxy_to\n    del webhdfs_config[\"token\"]\n    fs = WebHDFSFileSystem(**webhdfs_config)\n    # ssl verify can't be set until after the file system is instantiated\n    assert fs.fs\n    assert mock_session.verify == ssl_verify\n"
  },
  {
    "path": "tests/unit/render/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/render/test_convert.py",
    "content": "import json\n\nimport pytest\n\nfrom dvc.render import ANCHOR_DEFINITIONS, FILENAME, REVISION, REVISIONS, SRC, TYPE_KEY\nfrom dvc.render.convert import to_json\n\n\ndef test_to_json_vega(mocker):\n    vega_renderer = mocker.MagicMock()\n    vega_renderer.TYPE = \"vega\"\n    vega_renderer.get_revs.return_value = [\"bar\", \"foo\"]\n    vega_renderer.get_filled_template.return_value = {\"this\": \"is vega\"}\n    result = to_json(vega_renderer)\n    assert result[0] == {\n        TYPE_KEY: vega_renderer.TYPE,\n        REVISIONS: [\"bar\", \"foo\"],\n        \"content\": {\"this\": \"is vega\"},\n    }\n    vega_renderer.get_filled_template.assert_called()\n\n\n@pytest.mark.vscode\ndef test_to_json_vega_split(mocker):\n    revs = [\"bar\", \"foo\"]\n    content = json.dumps(\n        {\n            \"this\": \"is split vega\",\n            \"encoding\": {\"color\": \"<DVC_METRIC_COLOR>\"},\n            \"data\": {\"values\": \"<DVC_METRIC_DATA>\"},\n        }\n    )\n    anchor_definitions = {\n        \"<DVC_METRIC_COLOR>\": {\n            \"field\": \"rev\",\n            \"scale\": {\n                \"domain\": revs,\n                \"range\": [\"#ff0000\", \"#00ff00\"],\n            },\n        },\n        \"<DVC_METRIC_DATA>\": [\n            {\n                \"x\": 1,\n                \"y\": 2,\n                REVISION: \"foo\",\n                FILENAME: \"foo.json\",\n            },\n            {\n                \"x\": 2,\n                \"y\": 1,\n                REVISION: \"bar\",\n                FILENAME: \"foo.json\",\n            },\n        ],\n    }\n\n    vega_renderer = mocker.MagicMock()\n    vega_renderer.TYPE = \"vega\"\n    vega_renderer.get_partial_filled_template.return_value = (\n        content,\n        {ANCHOR_DEFINITIONS: anchor_definitions},\n    )\n    vega_renderer.get_revs.return_value = [\"bar\", \"foo\"]\n\n    result = to_json(vega_renderer, split=True)\n    assert result[0] == {\n        ANCHOR_DEFINITIONS: anchor_definitions,\n        TYPE_KEY: vega_renderer.TYPE,\n        REVISIONS: revs,\n        \"content\": content,\n    }\n    vega_renderer.get_partial_filled_template.assert_called_once()\n\n\ndef test_to_json_image(mocker):\n    image_renderer = mocker.MagicMock()\n    image_renderer.TYPE = \"image\"\n    image_renderer.datapoints = [\n        {SRC: \"contentfoo\", REVISION: \"foo\"},\n        {SRC: \"contentbar\", REVISION: \"bar\"},\n    ]\n    result = to_json(image_renderer)\n    assert result[0] == {\n        \"url\": image_renderer.datapoints[0].get(SRC),\n        REVISIONS: [image_renderer.datapoints[0].get(REVISION)],\n        TYPE_KEY: image_renderer.TYPE,\n    }\n"
  },
  {
    "path": "tests/unit/render/test_image_converter.py",
    "content": "from dvc.render import FILENAME, REVISION, SRC\nfrom dvc.render.converter.image import ImageConverter\n\n\ndef test_image_converter_no_out():\n    data = {\"image.png\": b\"content\"}\n    converter = ImageConverter(\"image.png\", data)\n    datapoints, _ = converter.flat_datapoints(\"r\")\n\n    assert datapoints[0] == {\n        REVISION: \"r\",\n        FILENAME: \"image.png\",\n        SRC: converter._encode_image(b\"content\"),\n    }\n\n\ndef test_image_converter_with_out(tmp_dir):\n    data = {\"image.png\": b\"content\"}\n    converter = ImageConverter(\"image.png\", data, {\"out\": tmp_dir / \"foo\"})\n\n    datapoints, _ = converter.flat_datapoints(\"r\")\n\n    assert datapoints[0] == {\n        REVISION: \"r\",\n        FILENAME: \"image.png\",\n        SRC: str(tmp_dir / \"foo\" / \"r_image.png\"),\n    }\n\n    assert (tmp_dir / \"foo\" / \"r_image.png\").read_bytes() == b\"content\"\n\n\ndef test_image_converter_with_slash_in_revision(tmp_dir):\n    \"\"\"Regression test for #7934\"\"\"\n    data = {\"image.png\": b\"content\"}\n    converter = ImageConverter(\"image.png\", data, {\"out\": tmp_dir / \"foo\"})\n\n    datapoints, _ = converter.flat_datapoints(\"feature/r\")\n\n    assert datapoints[0] == {\n        REVISION: \"feature/r\",\n        FILENAME: \"image.png\",\n        SRC: str(tmp_dir / \"foo\" / \"feature_r_image.png\"),\n    }\n\n    assert (tmp_dir / \"foo\" / \"feature_r_image.png\").read_bytes() == b\"content\"\n"
  },
  {
    "path": "tests/unit/render/test_match.py",
    "content": "import pytest\nfrom funcy import set_in\n\nfrom dvc.render import FIELD, FILENAME, REVISION\nfrom dvc.render.converter.vega import VegaConverter\nfrom dvc.render.match import PlotsData, _squash_plots_properties, match_defs_renderers\nfrom dvc.testing import matchers as M\n\n\n@pytest.mark.parametrize(\n    \"data,expected\",\n    [\n        pytest.param(\n            {\n                \"v1\": {\n                    \"definitions\": {\n                        \"data\": {\"config_file_1\": {\"data\": {\"plot_id_1\": {}}}}\n                    }\n                }\n            },\n            {\"plot_id_1\": [(\"v1\", \"plot_id_1\", {})]},\n            id=\"simple\",\n        ),\n        pytest.param(\n            {\n                \"v1\": {\n                    \"definitions\": {\n                        \"data\": {\n                            \"config_file_1\": {\"data\": {\"plot_id_1\": {}}},\n                            \"config_file_2\": {\"data\": {\"plot_id_1\": {}}},\n                        }\n                    }\n                }\n            },\n            {\n                \"config_file_1::plot_id_1\": [(\"v1\", \"plot_id_1\", {})],\n                \"config_file_2::plot_id_1\": [(\"v1\", \"plot_id_1\", {})],\n            },\n            id=\"multi_config\",\n        ),\n        pytest.param(\n            {\n                \"v1\": {\n                    \"definitions\": {\n                        \"data\": {\"config_file_1\": {\"data\": {\"plot_id_1\": {}}}}\n                    }\n                },\n                \"v2\": {\n                    \"definitions\": {\n                        \"data\": {\"config_file_2\": {\"data\": {\"plot_id_1\": {}}}}\n                    }\n                },\n            },\n            {\"plot_id_1\": [(\"v1\", \"plot_id_1\", {}), (\"v2\", \"plot_id_1\", {})]},\n            id=\"multi_rev\",\n        ),\n        pytest.param(\n            {\n                \"v1\": {\n                    \"definitions\": {\n                        \"data\": {\n                            \"config_file_1\": {\"data\": {\"plot_id_1\": {}}},\n                            \"config_file_2\": {\"data\": {\"plot_id_1\": {}}},\n                        }\n                    }\n                },\n                \"v2\": {\n                    \"definitions\": {\n                        \"data\": {\"config_file_1\": {\"data\": {\"plot_id_1\": {}}}}\n                    }\n                },\n            },\n            {\n                \"config_file_1::plot_id_1\": [(\"v1\", \"plot_id_1\", {})],\n                \"config_file_2::plot_id_1\": [(\"v1\", \"plot_id_1\", {})],\n                \"plot_id_1\": [(\"v2\", \"plot_id_1\", {})],\n            },\n            id=\"multi_rev_multi_config\",\n        ),\n        pytest.param(\n            {\n                \"v1\": {\n                    \"definitions\": {\n                        \"data\": {\n                            \"config_file_1\": {\n                                \"data\": {\"plot_id_1\": {}, \"plot_id_2\": {}}\n                            },\n                            \"config_file_2\": {\"data\": {\"plot_id_3\": {}}},\n                        }\n                    }\n                },\n                \"v2\": {\n                    \"definitions\": {\n                        \"data\": {\n                            \"config_file_2\": {\"data\": {\"plot_id_3\": {}}},\n                        }\n                    },\n                    \"source\": {\n                        \"data\": {\n                            \"config_file_1\": {\"error\": FileNotFoundError()},\n                        }\n                    },\n                },\n            },\n            {\n                \"plot_id_1\": [(\"v1\", \"plot_id_1\", {})],\n                \"plot_id_2\": [(\"v1\", \"plot_id_2\", {})],\n                \"plot_id_3\": [\n                    (\"v1\", \"plot_id_3\", {}),\n                    (\"v2\", \"plot_id_3\", {}),\n                ],\n            },\n            id=\"all\",\n        ),\n    ],\n)\ndef test_group_definitions(data, expected):\n    grouped = PlotsData(data).group_definitions()\n    assert grouped == expected\n\n\ndef test_match_renderers():\n    data = {\n        \"v1\": {\n            \"definitions\": {\n                \"data\": {\n                    \"config_file_1\": {\n                        \"data\": {\n                            \"plot_id_1\": {\n                                \"x\": \"x\",\n                                \"y\": {\"file.json\": \"y\"},\n                            }\n                        }\n                    }\n                },\n            },\n            \"sources\": {\n                \"data\": {\"file.json\": {\"data\": [{\"x\": 1, \"y\": 1}, {\"x\": 2, \"y\": 2}]}}\n            },\n        },\n        \"revision_with_no_data\": {\n            \"definitions\": {\n                \"data\": {\n                    \"config_file_1\": {\n                        \"data\": {\n                            \"plot_id_1\": {\n                                \"x\": \"x\",\n                                \"y\": {\"file.json\": \"y\"},\n                            }\n                        }\n                    }\n                },\n            },\n            \"sources\": {\"data\": {\"file.json\": {\"error\": FileNotFoundError()}}},\n        },\n    }\n\n    (renderer_with_errors,) = match_defs_renderers(data)\n    renderer = renderer_with_errors[0]\n    assert renderer.datapoints == [\n        {\n            REVISION: \"v1\",\n            FILENAME: \"file.json\",\n            FIELD: \"y\",\n            \"x\": 1,\n            \"y\": 1,\n        },\n        {\n            REVISION: \"v1\",\n            FILENAME: \"file.json\",\n            FIELD: \"y\",\n            \"x\": 2,\n            \"y\": 2,\n        },\n    ]\n    assert renderer.properties == {\n        \"anchors_y_definitions\": [{FILENAME: \"file.json\", FIELD: \"y\"}],\n        \"revs_with_datapoints\": [\"v1\"],\n        \"title\": \"plot_id_1\",\n        \"x\": \"x\",\n        \"y\": \"y\",\n        \"x_label\": \"x\",\n        \"y_label\": \"y\",\n    }\n    assert renderer_with_errors.source_errors == {\n        \"revision_with_no_data\": {\"file.json\": M.instance_of(FileNotFoundError)}\n    }\n    assert not renderer_with_errors.definition_errors\n\n\ndef test_flat_datapoints_errors_are_caught(mocker):\n    d = {}\n    d = set_in(\n        d,\n        [\"v1\", \"definitions\", \"data\", \"dvc.yaml\", \"data\", \"plot_id_1\"],\n        {\"x\": \"x\", \"y\": {\"file.json\": \"y\"}},\n    )\n    d = set_in(d, [\"v1\", \"sources\", \"data\", \"file.json\", \"data\"], [{\"x\": 1, \"y\": 1}])\n    mocker.patch.object(VegaConverter, \"flat_datapoints\", side_effect=ValueError)\n    (renderer_with_errors,) = match_defs_renderers(d)\n    assert not renderer_with_errors.source_errors\n    assert renderer_with_errors.definition_errors == {\"v1\": M.instance_of(ValueError)}\n\n\ndef test_squash_plots_properties_revs():\n    group = [\n        (\"v3\", \"config_file\", \"plot_id\", {\"foo\": 1}),\n        (\"v2\", \"config_file\", \"plot_id\", {\"foo\": 2, \"bar\": 2}),\n        (\"v1\", \"config_file\", \"plot_id\", {\"baz\": 3}),\n    ]\n\n    plot_properties = _squash_plots_properties(group)\n\n    assert plot_properties == {\"foo\": 1, \"bar\": 2, \"baz\": 3}\n\n\ndef test_squash_plots_properties_config_files():\n    group = [\n        (\"v1\", \"config_file1\", \"plot_id\", {\"foo\": 1}),\n        (\"v1\", \"config_file2\", \"plot_id\", {\"foo\": 2, \"bar\": 2}),\n        (\"v1\", \"config_file3\", \"plot_id\", {\"baz\": 3}),\n    ]\n\n    plot_properties = _squash_plots_properties(group)\n\n    assert plot_properties == {\"foo\": 1, \"bar\": 2, \"baz\": 3}\n"
  },
  {
    "path": "tests/unit/render/test_vega_converter.py",
    "content": "from collections import OrderedDict\n\nimport pytest\n\nfrom dvc.exceptions import DvcException\nfrom dvc.render import FIELD, FILENAME, REVISION\nfrom dvc.render.converter.vega import FieldNotFoundError, VegaConverter, _lists\n\n\n@pytest.mark.parametrize(\n    \"dictionary, expected_result\",\n    [\n        ({}, []),\n        ({\"x\": [\"a\", \"b\", \"c\"]}, [[\"a\", \"b\", \"c\"]]),\n        (\n            OrderedDict([(\"x\", {\"y\": [\"a\", \"b\"]}), (\"z\", {\"w\": [\"c\", \"d\"]})]),\n            [[\"a\", \"b\"], [\"c\", \"d\"]],\n        ),\n    ],\n)\ndef test_finding_lists(dictionary, expected_result):\n    result = _lists(dictionary)\n\n    assert list(result) == expected_result\n\n\n@pytest.mark.studio\n@pytest.mark.parametrize(\n    \"input_data,properties,expected_datapoints,expected_properties\",\n    [\n        pytest.param(\n            {\"f\": {\"metric\": [{\"v\": 1}, {\"v\": 2}]}},\n            {},\n            [\n                {\n                    \"v\": 1,\n                    \"step\": 0,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"v\": 2,\n                    \"step\": 1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [{FILENAME: \"f\", FIELD: \"v\"}],\n                \"x\": \"step\",\n                \"y\": \"v\",\n                \"x_label\": \"step\",\n                \"y_label\": \"v\",\n            },\n            id=\"default_x_y\",\n        ),\n        pytest.param(\n            {\"f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}, {\"v\": 2, \"v2\": 0.2}]}},\n            {\"x\": \"v\", \"y\": \"v2\"},\n            [\n                {\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [{FILENAME: \"f\", FIELD: \"v2\"}],\n                \"x\": \"v\",\n                \"y\": \"v2\",\n                \"x_label\": \"v\",\n                \"y_label\": \"v2\",\n            },\n            id=\"choose_x_y\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\n                    \"some\": \"noise\",\n                    \"very\": {\n                        \"nested\": {\n                            \"metric\": [\n                                {\"v\": 1, \"v2\": 0.1},\n                                {\"v\": 2, \"v2\": 0.2},\n                            ]\n                        }\n                    },\n                }\n            },\n            {\"x\": \"v\", \"y\": \"v2\", \"x_label\": \"x\", \"y_label\": \"y\"},\n            [\n                {\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [{FILENAME: \"f\", FIELD: \"v2\"}],\n                \"x\": \"v\",\n                \"y\": \"v2\",\n                \"x_label\": \"x\",\n                \"y_label\": \"y\",\n            },\n            id=\"find_in_nested_structure\",\n        ),\n        pytest.param(\n            {\"f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}, {\"v\": 2, \"v2\": 0.2}]}},\n            {\"y\": {\"f\": [\"v\", \"v2\"]}},\n            [\n                {\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                    \"dvc_inferred_y_value\": 1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"step\": 0,\n                },\n                {\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                    \"dvc_inferred_y_value\": 2,\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"step\": 1,\n                },\n                {\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                    \"dvc_inferred_y_value\": 0.1,\n                    \"v2\": 0.1,\n                    \"v\": 1,\n                    \"step\": 0,\n                },\n                {\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"dvc_inferred_y_value\": 0.2,\n                    \"step\": 1,\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v\"},\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                ],\n                \"x\": \"step\",\n                \"y\": \"dvc_inferred_y_value\",\n                \"y_label\": \"y\",\n                \"x_label\": \"step\",\n            },\n            id=\"y_def_list\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\n                    \"metric\": [{\"v\": 1}, {\"v\": 2}],\n                    \"other_metric\": [{\"z\": 3}, {\"z\": 4}],\n                }\n            },\n            {\"y\": {\"f\": [\"v\", \"z\"]}},\n            [\n                {\n                    \"dvc_inferred_y_value\": 1,\n                    \"z\": 3,\n                    \"v\": 1,\n                    \"step\": 0,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 2,\n                    \"z\": 4,\n                    \"step\": 1,\n                    \"v\": 2,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 3,\n                    \"v\": 1,\n                    \"z\": 3,\n                    \"step\": 0,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"z\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 4,\n                    \"v\": 2,\n                    \"z\": 4,\n                    \"step\": 1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"z\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v\"},\n                    {FILENAME: \"f\", FIELD: \"z\"},\n                ],\n                \"x\": \"step\",\n                \"y\": \"dvc_inferred_y_value\",\n                \"y_label\": \"y\",\n                \"x_label\": \"step\",\n            },\n            id=\"multi_source_json\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}, {\"v\": 2, \"v2\": 0.2}]},\n                \"f2\": {\"metric\": [{\"v\": 3, \"v2\": 0.3}]},\n            },\n            {\"x\": \"v\", \"y\": {\"f\": \"v2\", \"f2\": \"v2\"}},\n            [\n                {\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"v\": 3,\n                    \"v2\": 0.3,\n                    REVISION: \"r\",\n                    FILENAME: \"f2\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                    {FILENAME: \"f2\", FIELD: \"v2\"},\n                ],\n                \"x\": \"v\",\n                \"y\": \"v2\",\n                \"x_label\": \"v\",\n                \"y_label\": \"v2\",\n            },\n            id=\"multi_file_json\",\n        ),\n        pytest.param(\n            {\"f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}, {\"v\": 2, \"v2\": 0.2}]}},\n            {\"y\": [\"v\", \"v2\"]},\n            [\n                {\n                    \"dvc_inferred_y_value\": 1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"step\": 0,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 2,\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"step\": 1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 0.1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"step\": 0,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 0.2,\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"step\": 1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v\"},\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                ],\n                \"x\": \"step\",\n                \"y\": \"dvc_inferred_y_value\",\n                \"x_label\": \"step\",\n                \"y_label\": \"y\",\n            },\n            id=\"y_list\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1, \"v3\": 0.01}]},\n                \"f2\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}]},\n            },\n            {\"y\": {\"f\": [\"v2\", \"v3\"], \"f2\": [\"v2\"]}, \"x\": \"v\"},\n            [\n                {\n                    \"dvc_inferred_y_value\": 0.1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"v3\": 0.01,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 0.01,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"v3\": 0.01,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v3\",\n                },\n                {\n                    \"dvc_inferred_y_value\": 0.1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f2\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                    {FILENAME: \"f\", FIELD: \"v3\"},\n                    {FILENAME: \"f2\", FIELD: \"v2\"},\n                ],\n                \"x\": \"v\",\n                \"y\": \"dvc_inferred_y_value\",\n                \"x_label\": \"v\",\n                \"y_label\": \"y\",\n            },\n            id=\"multi_source_y_single_x\",\n        ),\n        pytest.param(\n            {\n                \"dir/f\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}]},\n                \"dir/f2\": {\"metric\": [{\"v\": 1, \"v2\": 0.1}]},\n            },\n            {\"y\": {\"dir/f\": [\"v2\"], \"dir/f2\": [\"v2\"]}, \"x\": \"v\"},\n            [\n                {\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    REVISION: \"r\",\n                    FILENAME: \"f2\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                    {FILENAME: \"f2\", FIELD: \"v2\"},\n                ],\n                \"x\": \"v\",\n                \"y\": \"v2\",\n                \"x_label\": \"v\",\n                \"y_label\": \"v2\",\n            },\n            id=\"multi_file_y_same_prefix\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\"metric\": [{\"x1\": 1, \"v\": 0.1}]},\n                \"f2\": {\"metric\": [{\"x2\": 100, \"v\": 0.1}]},\n            },\n            {\"y\": {\"f\": [\"v\"], \"f2\": [\"v\"]}, \"x\": {\"f\": \"x1\", \"f2\": \"x2\"}},\n            [\n                {\n                    \"x1\": 1,\n                    \"v\": 0.1,\n                    \"dvc_inferred_x_value\": 1,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"x2\": 100,\n                    \"v\": 0.1,\n                    \"dvc_inferred_x_value\": 100,\n                    REVISION: \"r\",\n                    FILENAME: \"f2\",\n                    FIELD: \"v\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v\"},\n                    {FILENAME: \"f2\", FIELD: \"v\"},\n                ],\n                \"x\": \"dvc_inferred_x_value\",\n                \"y\": \"v\",\n                \"x_label\": \"x\",\n                \"y_label\": \"v\",\n            },\n            id=\"multiple_x_fields\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\n                    \"metric\": [\n                        {\"v\": 1, \"v2\": 0.1, \"x1\": 100},\n                        {\"v\": 2, \"v2\": 0.2, \"x1\": 1000},\n                    ]\n                },\n                \"f2\": {\"metric\": [{\"x2\": -2}, {\"x2\": -4}]},\n            },\n            {\"y\": [\"v\", \"v2\"], \"x\": {\"f\": \"x1\", \"f2\": \"x2\"}},\n            [\n                {\n                    \"dvc_inferred_x_value\": 100,\n                    \"dvc_inferred_y_value\": 1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"x1\": 100,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_x_value\": 1000,\n                    \"dvc_inferred_y_value\": 2,\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"x1\": 1000,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v\",\n                },\n                {\n                    \"dvc_inferred_x_value\": -2,\n                    \"dvc_inferred_y_value\": 0.1,\n                    \"v\": 1,\n                    \"v2\": 0.1,\n                    \"x1\": 100,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n                {\n                    \"dvc_inferred_x_value\": -4,\n                    \"dvc_inferred_y_value\": 0.2,\n                    \"v\": 2,\n                    \"v2\": 0.2,\n                    \"x1\": 1000,\n                    REVISION: \"r\",\n                    FILENAME: \"f\",\n                    FIELD: \"v2\",\n                },\n            ],\n            {\n                \"anchors_y_definitions\": [\n                    {FILENAME: \"f\", FIELD: \"v\"},\n                    {FILENAME: \"f\", FIELD: \"v2\"},\n                ],\n                \"x\": \"dvc_inferred_x_value\",\n                \"y\": \"dvc_inferred_y_value\",\n                \"x_label\": \"x\",\n                \"y_label\": \"y\",\n            },\n            id=\"y_list_x_dict\",\n        ),\n    ],\n)\ndef test_convert(input_data, properties, expected_datapoints, expected_properties):\n    converter = VegaConverter(\"f\", input_data, properties)\n    datapoints, resolved_properties = converter.flat_datapoints(\"r\")\n\n    assert datapoints == expected_datapoints\n    assert resolved_properties == expected_properties\n\n\n@pytest.mark.parametrize(\n    \"input_data,properties,exc\",\n    [\n        pytest.param(\n            {\n                \"f\": {\n                    \"metric\": [\n                        {\"v\": 1},\n                        {\"v\": 2},\n                    ]\n                },\n                \"f2\": {\"metric\": [{\"v2\": 0.1}]},\n            },\n            {\"x\": {\"f\": \"v\"}, \"y\": {\"f2\": \"v2\"}},\n            DvcException,\n            id=\"unequal_datapoints\",\n        ),\n        pytest.param(\n            {\n                \"f\": {\n                    \"metric\": [\n                        {\"v\": 1, \"v2\": 0.1},\n                        {\"v\": 2, \"v2\": 0.2},\n                    ]\n                },\n                \"f2\": {\n                    \"metric\": [\n                        {\"v\": 3, \"v2\": 0.3},\n                    ]\n                },\n            },\n            {\"x\": {\"f\": \"v\", \"f2\": \"v3\"}, \"y\": {\"f\": \"v2\"}},\n            FieldNotFoundError,\n            id=\"unequal_x_y\",\n        ),\n    ],\n)\ndef test_convert_fail(input_data, properties, exc):\n    converter = VegaConverter(\"f\", input_data, properties)\n    with pytest.raises(exc):\n        converter.flat_datapoints(\"r\")\n\n\n@pytest.mark.parametrize(\n    \"properties,label\",\n    [\n        ({\"x\": {\"actual.csv\": \"actual\"}}, \"actual\"),\n        (\n            {\"x\": {\"train_actual.csv\": \"actual\", \"val_actual.csv\": \"actual\"}},\n            \"actual\",\n        ),\n        (\n            {\"x\": {\"actual.csv\": \"actual\", \"predicted.csv\": \"predicted\"}},\n            \"x\",\n        ),\n    ],\n)\ndef test_infer_x_label(properties, label):\n    assert VegaConverter.infer_x_label(properties) == label\n"
  },
  {
    "path": "tests/unit/repo/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/repo/experiments/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/repo/experiments/conftest.py",
    "content": "from functools import partial\n\nimport pytest\n\nfrom dvc_task.app import FSApp\n\nDEFAULT_ITERATIONS = 2\n\n\n@pytest.fixture\ndef exp_stage(tmp_dir, scm, dvc, copy_script):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    stage = dvc.run(\n        cmd=\"python copy.py params.yaml metrics.yaml\",\n        metrics_no_cache=[\"metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"metrics.yaml\",\n            \".gitignore\",\n        ]\n    )\n    scm.commit(\"init\")\n    return stage\n\n\n@pytest.fixture\ndef failed_exp_stage(tmp_dir, scm, dvc, copy_script):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\")\n    stage = dvc.stage.add(\n        cmd=\"python -c 'import sys; sys.exit(1)'\",\n        metrics_no_cache=[\"failed-metrics.yaml\"],\n        params=[\"foo\"],\n        name=\"failed-copy-file\",\n        deps=[\"copy.py\"],\n    )\n    scm.add(\n        [\n            \"dvc.yaml\",\n            \"dvc.lock\",\n            \"copy.py\",\n            \"params.yaml\",\n            \"failed-metrics.yaml\",\n            \".gitignore\",\n        ]\n    )\n    scm.commit(\"init\")\n    return stage\n\n\ndef _thread_worker(app, **kwargs):\n    # Based on pytest-celery's celery_worker fixture but using thread pool\n    # instead of solo pool so that broadcast/control API is available\n    from celery.contrib.testing import worker\n\n    app.loader.import_task_module(\"celery.contrib.testing.tasks\")\n    return worker.start_worker(app, pool=\"threads\", **kwargs)\n\n\n@pytest.fixture(scope=\"session\")\ndef session_app(tmp_path_factory) -> FSApp:\n    \"\"\"Session scoped experiments queue celery app.\"\"\"\n    from kombu.transport.filesystem import Channel\n\n    # related to https://github.com/treeverse/dvc-task/issues/61\n    Channel.QoS.restore_at_shutdown = False\n\n    from dvc_task.app import FSApp\n\n    wdir = tmp_path_factory.mktemp(\"dvc-test-celery\")\n    app = FSApp(\n        \"dvc-exp-local\",\n        wdir=wdir,\n        mkdir=True,\n        include=[\"dvc.repo.experiments.queue.tasks\", \"dvc_task.proc.tasks\"],\n    )\n    app.conf.update({\"task_acks_late\": True, \"result_expires\": None})\n    return app\n\n\n@pytest.fixture(scope=\"session\")\ndef session_worker(session_app):\n    \"\"\"Session scoped celery worker that runs in separate thread(s).\"\"\"\n    with _thread_worker(\n        session_app,\n        concurrency=1,\n        ping_task_timeout=20,\n        loglevel=\"DEBUG\",\n    ) as worker:\n        yield worker\n\n\n@pytest.fixture\ndef session_queue(tmp_dir, dvc, scm, mocker, session_app, session_worker):\n    \"\"\"Patches experiments celery queue for pytest testing.\n\n    Uses session-scoped celery worker.\n    \"\"\"\n    queue = dvc.experiments.celery_queue\n    queue.celery = session_app\n    queue.worker = session_worker\n    mocker.patch.object(queue, \"_spawn_worker\")\n    return queue\n\n\n@pytest.fixture\ndef test_queue(tmp_dir, dvc, scm, mocker):\n    \"\"\"Patches experiments celery queue for pytest testing.\n\n    Uses function-scoped celery worker which runs in separate thread(s).\n    \"\"\"\n    import celery\n\n    queue = dvc.experiments.celery_queue\n    mocker.patch.object(queue, \"_spawn_worker\")\n\n    f = partial(_thread_worker, queue.celery, concurrency=1, ping_task_timeout=20)\n    exc = None\n    for _ in range(3):\n        try:\n            with f() as worker:\n                mocker.patch.object(queue, \"worker\", return_value=worker)\n                yield queue\n                return\n        except celery.exceptions.TimeoutError as e:\n            exc = e\n            continue\n    assert exc\n    raise exc\n"
  },
  {
    "path": "tests/unit/repo/experiments/queue/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/repo/experiments/queue/test_celery.py",
    "content": "import time\n\nimport pytest\nfrom celery import shared_task\nfrom celery.result import AsyncResult\n\nfrom dvc.exceptions import DvcException\nfrom dvc.repo.experiments.exceptions import UnresolvedExpNamesError\nfrom dvc.repo.experiments.queue.base import QueueDoneResult\nfrom dvc.repo.experiments.queue.exceptions import CannotKillTasksError\n\n\ndef test_shutdown_no_tasks(test_queue, mocker):\n    shutdown_spy = mocker.spy(test_queue.celery.control, \"shutdown\")\n    test_queue.shutdown()\n    shutdown_spy.assert_called_once()\n\n\n@shared_task\ndef _foo(arg=None):\n    return \"foo\"\n\n\ndef test_shutdown(test_queue, mocker):\n    shutdown_spy = mocker.patch(\"celery.app.control.Control.shutdown\")\n    test_queue.shutdown()\n    shutdown_spy.assert_called_once()\n\n\ndef test_shutdown_with_kill(test_queue, mocker):\n    mock_entry_foo = mocker.Mock(stash_rev=\"af12de\")\n    mock_entry_foo.name = \"foo\"\n    mock_entry_bar = mocker.Mock(stash_rev=\"bar\")\n    mock_entry_bar.name = None\n\n    shutdown_spy = mocker.patch(\"celery.app.control.Control.shutdown\")\n    mocker.patch.object(\n        test_queue,\n        \"iter_active\",\n        return_value=[mock_entry_foo, mock_entry_bar],\n    )\n    kill_spy = mocker.patch.object(test_queue, \"_kill_entries\")\n\n    test_queue.shutdown(kill=True)\n\n    shutdown_spy.assert_called_once()\n    kill_spy.assert_called_once_with(\n        {mock_entry_foo: \"foo\", mock_entry_bar: \"bar\"}, True\n    )\n\n\ndef test_post_run_after_kill(test_queue):\n    from celery import chain\n\n    sig_bar = test_queue.proc.run_signature(\n        [\"python3\", \"-c\", \"import time; time.sleep(10)\"], name=\"bar\"\n    )\n    sig_bar.freeze()\n    sig_foo = _foo.s()\n    result_foo = sig_foo.freeze()\n    run_chain = chain(sig_bar, sig_foo)\n\n    run_chain.delay()\n    timeout = time.time() + 10\n\n    while True:\n        try:\n            test_queue.proc.kill(\"bar\")\n            assert result_foo.status == \"PENDING\"\n            break\n        except ProcessLookupError:\n            time.sleep(0.1)\n        if time.time() > timeout:\n            raise TimeoutError\n\n    assert result_foo.get(timeout=10) == \"foo\"\n\n\n@pytest.mark.parametrize(\"force\", [True, False])\ndef test_celery_queue_kill(test_queue, mocker, force):\n    mock_entry_foo = mocker.Mock(stash_rev=\"foo\")\n    mock_entry_bar = mocker.Mock(stash_rev=\"bar\")\n    mock_entry_foobar = mocker.Mock(stash_rev=\"foobar\")\n\n    mocker.patch.object(\n        test_queue,\n        \"iter_active\",\n        return_value={mock_entry_foo, mock_entry_bar, mock_entry_foobar},\n    )\n    mocker.patch.object(\n        test_queue,\n        \"match_queue_entry_by_name\",\n        return_value={\n            \"bar\": mock_entry_bar,\n            \"foo\": mock_entry_foo,\n            \"foobar\": mock_entry_foobar,\n        },\n    )\n    mocker.patch.object(\n        test_queue,\n        \"_get_running_task_ids\",\n        return_value={\"foo\", \"foobar\"},\n    )\n    mocker.patch.object(\n        test_queue,\n        \"_iter_processed\",\n        return_value=[\n            (mocker.Mock(headers={\"id\": \"foo\"}), mock_entry_foo),\n            (mocker.Mock(headers={\"id\": \"bar\"}), mock_entry_bar),\n            (mocker.Mock(headers={\"id\": \"foobar\"}), mock_entry_foobar),\n        ],\n    )\n    mocker.patch.object(AsyncResult, \"ready\", return_value=False)\n    mark_mocker = mocker.patch.object(test_queue.celery.backend, \"mark_as_failure\")\n\n    def kill_function(rev):\n        if rev == \"foo\":\n            return True\n        raise ProcessLookupError\n\n    kill_mock = mocker.patch.object(\n        test_queue.proc,\n        \"kill\" if force else \"interrupt\",\n        side_effect=mocker.MagicMock(side_effect=kill_function),\n    )\n    with pytest.raises(CannotKillTasksError, match=\"Task 'foobar' is initializing,\"):\n        test_queue.kill([\"bar\", \"foo\", \"foobar\"], force=force)\n    assert kill_mock.call_args_list == [\n        mocker.call(mock_entry_bar.stash_rev),\n        mocker.call(mock_entry_foo.stash_rev),\n        mocker.call(mock_entry_foobar.stash_rev),\n    ]\n    mark_mocker.assert_called_once_with(\"bar\", None)\n\n\n@pytest.mark.parametrize(\"force\", [True, False])\ndef test_celery_queue_kill_invalid(test_queue, mocker, force):\n    mock_entry_foo = mocker.Mock(stash_rev=\"foo\")\n    mock_entry_bar = mocker.Mock(stash_rev=\"bar\")\n\n    mocker.patch.object(\n        test_queue,\n        \"match_queue_entry_by_name\",\n        return_value={\"bar\": mock_entry_bar, \"foo\": mock_entry_foo, \"foobar\": None},\n    )\n\n    kill_mock = mocker.patch.object(test_queue, \"_kill_entries\")\n\n    with pytest.raises(UnresolvedExpNamesError):\n        test_queue.kill([\"bar\", \"foo\", \"foobar\"], force=force)\n    kill_mock.assert_called_once_with(\n        {mock_entry_foo: \"foo\", mock_entry_bar: \"bar\"}, force\n    )\n\n\n@pytest.mark.parametrize(\"status\", [\"FAILURE\", \"SUCCESS\"])\ndef test_queue_iter_done_task(test_queue, mocker, status):\n    mock_entry = mocker.Mock(stash_rev=_foo.name)\n\n    result = mocker.Mock(status=status)\n\n    mocker.patch.object(\n        test_queue,\n        \"_iter_done_tasks\",\n        return_value=[(result, mock_entry)],\n    )\n\n    if status == \"FAILURE\":\n        assert list(test_queue.iter_failed()) == [QueueDoneResult(mock_entry, None)]\n\n    elif status == \"SUCCESS\":\n        with pytest.raises(DvcException, match=\"Invalid experiment\"):\n            assert list(test_queue.iter_success())\n\n\ndef test_queue_status(test_queue, scm, mocker):\n    from datetime import datetime\n\n    active_entry = mocker.Mock(stash_rev=\"active\")\n    active_entry.name = \"foo\"\n    queued_entry = mocker.Mock(stash_rev=\"queued\")\n    queued_entry.name = None\n    failed_entry = mocker.Mock(stash_rev=\"failed\")\n    failed_entry.name = \"bar\"\n    success_entry = mocker.Mock(stash_rev=\"success\")\n    success_entry.name = None\n    success_result = mocker.Mock(ref_info=mocker.Mock())\n    success_result.ref_info.name = \"foobar\"\n\n    def resolve_commit(rev):\n        if rev == \"active\":\n            commit_time = datetime(2022, 8, 7).timestamp()  # noqa: DTZ001\n        elif rev == \"queued\":\n            commit_time = datetime(2022, 8, 6).timestamp()  # noqa: DTZ001\n        elif rev == \"failed\":\n            commit_time = datetime(2022, 8, 5).timestamp()  # noqa: DTZ001\n        elif rev == \"success\":\n            commit_time = datetime(2022, 8, 4).timestamp()  # noqa: DTZ001\n        return mocker.Mock(commit_time=commit_time)\n\n    mocker.patch.object(\n        scm,\n        \"resolve_commit\",\n        side_effect=mocker.MagicMock(side_effect=resolve_commit),\n    )\n\n    mocker.patch.object(\n        test_queue,\n        \"iter_active\",\n        return_value=[active_entry],\n    )\n    mocker.patch.object(\n        test_queue,\n        \"iter_queued\",\n        return_value=[queued_entry],\n    )\n    mocker.patch.object(\n        test_queue,\n        \"iter_failed\",\n        return_value=[(failed_entry, None)],\n    )\n    mocker.patch.object(\n        test_queue,\n        \"iter_success\",\n        return_value=[(success_entry, success_result)],\n    )\n\n    assert test_queue.status() == [\n        {\n            \"name\": \"foo\",\n            \"rev\": \"active\",\n            \"status\": \"Running\",\n            \"timestamp\": datetime(2022, 8, 7, 0, 0, 0),  # noqa: DTZ001\n        },\n        {\n            \"name\": None,\n            \"rev\": \"queued\",\n            \"status\": \"Queued\",\n            \"timestamp\": datetime(2022, 8, 6, 0, 0, 0),  # noqa: DTZ001\n        },\n        {\n            \"name\": \"bar\",\n            \"rev\": \"failed\",\n            \"status\": \"Failed\",\n            \"timestamp\": datetime(2022, 8, 5, 0, 0, 0),  # noqa: DTZ001\n        },\n        {\n            \"name\": \"foobar\",\n            \"rev\": \"success\",\n            \"status\": \"Success\",\n            \"timestamp\": datetime(2022, 8, 4, 0, 0, 0),  # noqa: DTZ001\n        },\n    ]\n"
  },
  {
    "path": "tests/unit/repo/experiments/queue/test_remove.py",
    "content": "from dvc.repo.experiments.queue.base import QueueDoneResult\n\n\ndef test_remove_queued(test_queue, mocker):\n    queued_test = [\"queue1\", \"queue2\", \"queue3\"]\n\n    stash_dict = {}\n    for name in queued_test:\n        stash_dict[name] = mocker.Mock()\n\n    msg_dict = {}\n    entry_dict = {}\n    for name in queued_test:\n        msg_dict[name] = mocker.Mock(delivery_tag=f\"msg_{name}\")\n        entry_dict[name] = mocker.Mock(stash_rev=name)\n        entry_dict[name].name = name\n\n    msg_iter = [(msg_dict[name], entry_dict[name]) for name in queued_test]\n    entry_iter = [entry_dict[name] for name in queued_test]\n\n    stash = mocker.patch.object(test_queue, \"stash\", return_value=mocker.Mock())\n    stash.stash_revs = stash_dict\n    mocker.patch.object(test_queue, \"_iter_queued\", return_value=msg_iter)\n    mocker.patch.object(test_queue, \"iter_queued\", return_value=entry_iter)\n\n    remove_revs_mocker = mocker.patch.object(test_queue.stash, \"remove_revs\")\n    reject_mocker = mocker.patch.object(test_queue.celery, \"reject\")\n\n    assert test_queue.remove([\"queue2\"]) == [\"queue2\"]\n    reject_mocker.assert_called_once_with(\"msg_queue2\")\n    remove_revs_mocker.assert_called_once_with([stash_dict[\"queue2\"]])\n    remove_revs_mocker.reset_mock()\n    reject_mocker.reset_mock()\n\n    assert test_queue.clear(queued=True) == queued_test\n    remove_revs_mocker.assert_called_once_with(list(stash_dict.values()))\n    reject_mocker.assert_has_calls(\n        [\n            mocker.call(\"msg_queue1\"),\n            mocker.call(\"msg_queue2\"),\n            mocker.call(\"msg_queue3\"),\n        ]\n    )\n\n\ndef test_remove_done(test_queue, mocker):\n    from funcy import concat\n\n    failed_test = [\"failed1\", \"failed2\", \"failed3\"]\n    success_test = [\"success1\", \"success2\", \"success3\"]\n\n    stash_dict = {}\n    for name in failed_test:\n        stash_dict[name] = mocker.Mock()\n\n    msg_dict = {}\n    entry_dict = {}\n    for name in concat(failed_test, success_test):\n        msg_dict[name] = mocker.Mock(delivery_tag=f\"msg_{name}\", headers={\"id\": 0})\n        entry_dict[name] = mocker.Mock(stash_rev=name)\n        entry_dict[name].name = name\n\n    msg_iter = [\n        (msg_dict[name], entry_dict[name]) for name in concat(failed_test, success_test)\n    ]\n    done_iter = [\n        QueueDoneResult(entry_dict[name], None)\n        for name in concat(failed_test, success_test)\n    ]\n    failed_iter = [QueueDoneResult(entry_dict[name], None) for name in failed_test]\n    success_iter = [QueueDoneResult(entry_dict[name], None) for name in success_test]\n\n    stash = mocker.patch.object(test_queue, \"failed_stash\", return_value=mocker.Mock())\n    stash.stash_revs = stash_dict\n    mocker.patch.object(test_queue, \"_iter_processed\", return_value=msg_iter)\n    mocker.patch.object(test_queue, \"iter_done\", return_value=done_iter)\n    mocker.patch.object(test_queue, \"iter_success\", return_value=success_iter)\n    mocker.patch.object(test_queue, \"iter_failed\", return_value=failed_iter)\n    mocker.patch(\"celery.result.AsyncResult\", return_value=mocker.Mock())\n\n    remove_revs_mocker = mocker.patch.object(test_queue.failed_stash, \"remove_revs\")\n    purge_mocker = mocker.patch.object(test_queue.celery, \"purge\")\n\n    assert test_queue.remove([\"failed3\", \"success2\"]) == [\"failed3\", \"success2\"]\n    remove_revs_mocker.assert_called_once_with([stash_dict[\"failed3\"]])\n    purge_mocker.assert_has_calls(\n        [mocker.call(\"msg_failed3\"), mocker.call(\"msg_success2\")]\n    )\n\n    remove_revs_mocker.reset_mock()\n    purge_mocker.reset_mock()\n\n    assert set(test_queue.clear(success=True, failed=True)) == set(failed_test) | set(\n        success_test\n    )\n    purge_mocker.assert_has_calls(\n        [\n            mocker.call(\"msg_failed1\"),\n            mocker.call(\"msg_failed2\"),\n            mocker.call(\"msg_failed3\"),\n            mocker.call(\"msg_success1\"),\n            mocker.call(\"msg_success2\"),\n            mocker.call(\"msg_success3\"),\n        ],\n        any_order=True,\n    )\n    remove_revs_mocker.assert_called_once_with(list(stash_dict.values()))\n"
  },
  {
    "path": "tests/unit/repo/experiments/test_collect.py",
    "content": "import datetime\nimport random\n\nimport pytest\n\nfrom dvc.repo.experiments.collect import ExpRange, ExpState, SerializableExp, collect\n\n\n@pytest.mark.vscode\ndef test_collect_stable_sorting(dvc, scm, mocker):\n    \"\"\"\n    Check that output is deterministically sorted even for\n    commits with the same timestamp. This affects the experience\n    in vs-code to avoid experiments \"bouncing around\" when \"exp show\"\n    is called repeatedly\n    \"\"\"\n    expected_revs = [\n        \"c\" * 40,\n        \"b\" * 40,\n        \"a\" * 40,\n        \"7\" * 40,\n    ]\n\n    def collect_queued_patched(_, baseline_revs) -> dict[str, list[\"ExpRange\"]]:\n        single_timestamp = datetime.datetime(2023, 6, 20, 0, 0, 0)  # noqa: DTZ001\n\n        exp_ranges = [\n            ExpRange(\n                revs=[\n                    ExpState(\n                        rev=rev,\n                        name=f\"exp-state-{rev[0]}\",\n                        data=SerializableExp(rev=rev, timestamp=single_timestamp),\n                    )\n                ],\n                name=f\"exp-range-{rev[0]}\",\n            )\n            for rev in expected_revs\n        ]\n\n        # shuffle collection order\n        random.shuffle(exp_ranges)\n\n        return dict.fromkeys(baseline_revs, exp_ranges)\n\n    mocker.patch(\"dvc.repo.experiments.collect.collect_queued\", collect_queued_patched)\n    mocker.patch(\"dvc.repo.experiments.collect.collect_active\", return_value={})\n    mocker.patch(\"dvc.repo.experiments.collect.collect_failed\", return_value={})\n    mocker.patch(\"dvc.repo.experiments.collect.collect_successful\", return_value={})\n\n    # repeat (shuffling collection order in collect_queued_patched)\n    for _ in range(20):\n        collected = collect(repo=dvc, all_commits=True)\n        assert collected[0].rev == \"workspace\"\n        assert collected[0].experiments is None\n        assert collected[1].rev == scm.get_rev()\n        _assert_experiment_rev_order(collected[1].experiments, expected_revs)\n\n\ndef _assert_experiment_rev_order(actual: list[\"ExpRange\"], expected_revs: list[str]):\n    expected_revs = expected_revs.copy()\n\n    for actual_exp_range in actual:\n        for exp_state in actual_exp_range.revs:\n            assert exp_state.rev == expected_revs.pop(0)\n"
  },
  {
    "path": "tests/unit/repo/experiments/test_executor_status.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.exceptions import ReproductionError\nfrom dvc.repo.experiments.executor.base import ExecutorInfo, TaskStatus\nfrom dvc.repo.experiments.queue.tasks import cleanup_exp, collect_exp, setup_exp\n\n\ndef test_celery_queue_success_status(dvc, scm, test_queue, exp_stage):\n    queue_entry = test_queue._stash_exp(\n        params={\"params.yaml\": [\"foo=1\"]},\n        targets=exp_stage.addressing,\n        name=\"success\",\n    )\n    infofile = test_queue.get_infofile_path(queue_entry.stash_rev)\n    executor = setup_exp.s(queue_entry.asdict())()\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.PREPARING\n\n    cmd = [\"dvc\", \"exp\", \"exec-run\", \"--infofile\", infofile]\n    proc_dict = test_queue.proc.run_signature(cmd, name=queue_entry.stash_rev)()\n\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.SUCCESS\n\n    collect_exp.s(proc_dict, queue_entry.asdict())()\n    cleanup_exp.s(executor, infofile)()\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.FINISHED\n\n\ndef test_celery_queue_failure_status(dvc, scm, test_queue, failed_exp_stage):\n    queue_entry = test_queue._stash_exp(\n        params={\"params.yaml\": [\"foo=1\"]},\n        targets=failed_exp_stage.addressing,\n        name=\"failed\",\n    )\n    infofile = test_queue.get_infofile_path(queue_entry.stash_rev)\n    setup_exp.s(queue_entry.asdict())()\n    cmd = [\"dvc\", \"exp\", \"exec-run\", \"--infofile\", infofile]\n    test_queue.proc.run_signature(cmd, name=queue_entry.stash_rev)()\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.FAILED\n\n\n@pytest.mark.parametrize(\"queue_type\", [\"workspace_queue\", \"tempdir_queue\"])\ndef test_workspace_executor_success_status(dvc, scm, exp_stage, queue_type):\n    workspace_queue = getattr(dvc.experiments, queue_type)\n    queue_entry = workspace_queue.put(\n        params={\"params.yaml\": [\"foo=1\"]}, targets=exp_stage.addressing, name=\"success\"\n    )\n    name = workspace_queue._EXEC_NAME or queue_entry.stash_rev\n    infofile = workspace_queue.get_infofile_path(name)\n    entry, executor = workspace_queue.get()\n    rev = entry.stash_rev\n    exec_result = executor.reproduce(info=executor.info, rev=rev, infofile=infofile)\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.SUCCESS\n    if exec_result.ref_info:\n        workspace_queue.collect_executor(dvc.experiments, executor, exec_result)\n    executor.cleanup(infofile)\n\n    if queue_type == \"tempdir_queue\":\n        executor_info = ExecutorInfo.load_json(infofile)\n        assert executor_info.status == TaskStatus.FINISHED\n    else:\n        assert not os.path.exists(infofile)\n\n\n@pytest.mark.parametrize(\"queue_type\", [\"workspace_queue\", \"tempdir_queue\"])\ndef test_workspace_executor_failed_status(dvc, scm, failed_exp_stage, queue_type):\n    queue = getattr(dvc.experiments, queue_type)\n    queue.put(\n        params={\"params.yaml\": [\"foo=1\"]},\n        targets=failed_exp_stage.addressing,\n        name=\"failed\",\n    )\n    entry, executor = queue.get()\n    name = queue._EXEC_NAME or entry.stash_rev\n    infofile = queue.get_infofile_path(name)\n    rev = entry.stash_rev\n\n    with pytest.raises(ReproductionError):\n        executor.reproduce(info=executor.info, rev=rev, infofile=infofile)\n    executor_info = ExecutorInfo.load_json(infofile)\n    assert executor_info.status == TaskStatus.FAILED\n\n    cleanup_exp.s(executor, infofile)()\n    if queue_type == \"workspace_queue\":\n        assert not os.path.exists(infofile)\n    else:\n        executor_info = ExecutorInfo.load_json(infofile)\n        assert executor_info.status == TaskStatus.FAILED\n\n\ndef test_executor_status_compatibility():\n    data = {\n        \"git_url\": \"file:///Users/home\",\n        \"baseline_rev\": \"123\",\n        \"location\": \"dvc-task\",\n        \"root_dir\": \"/Users/home/8088/.dvc/tmp/exps/tmpx85892cx\",\n        \"dvc_dir\": \".dvc\",\n        \"collected\": True,\n    }\n    result = ExecutorInfo.from_dict(data)\n    assert result.status == TaskStatus.FINISHED\n"
  },
  {
    "path": "tests/unit/repo/experiments/test_remove.py",
    "content": "from dvc.repo.experiments.queue.base import QueueDoneResult\n\n\ndef test_remove_done_tasks(dvc, test_queue, scm, mocker):\n    from funcy import concat\n\n    failed_test = [\"failed1\", \"failed2\"]\n    success_test = [\"success1\", \"success2\"]\n\n    # create mock ref info\n    ref_info_dict = {}\n    for name in success_test:\n        ref_info_dict[name] = mocker.Mock()\n        ref_info_dict[name].name = name\n    for name in failed_test:\n        ref_info_dict[name] = None\n\n    # create mock queue entry\n    entry_dict = {}\n    for name in concat(failed_test, success_test):\n        entry_dict[name] = mocker.Mock(stash_rev=name)\n        entry_dict[name].name = name\n\n    done_iter = [\n        QueueDoneResult(entry_dict[name], None)\n        for name in concat(failed_test, success_test)\n    ]\n\n    mocker.patch.object(test_queue, \"iter_done\", return_value=done_iter)\n\n    mocker.patch(\n        \"dvc.repo.experiments.utils.resolve_name\",\n        autospec=True,\n        return_value=ref_info_dict,\n    )\n\n    remove_exp_refs = mocker.patch(\n        \"dvc.repo.experiments.utils.remove_exp_refs\",\n    )\n    remove_tasks_mocker = mocker.patch(\n        \"dvc.repo.experiments.queue.remove.remove_tasks\",\n    )\n\n    assert (\n        dvc.experiments.remove(failed_test + success_test) == failed_test + success_test\n    )\n\n    remove_tasks_mocker.assert_called_once_with(\n        test_queue,\n        [entry_dict[name] for name in failed_test + success_test],\n    )\n\n    remove_exp_refs.assert_called_once_with(\n        dvc.scm, [ref_info_dict[name] for name in success_test]\n    )\n"
  },
  {
    "path": "tests/unit/repo/experiments/test_utils.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc.exceptions import InvalidArgumentError\nfrom dvc.repo.experiments.refs import EXPS_NAMESPACE, ExpRefInfo\nfrom dvc.repo.experiments.utils import (\n    check_ref_format,\n    gen_random_name,\n    resolve_name,\n    to_studio_params,\n)\n\n\ndef commit_exp_ref(tmp_dir, scm, file=\"foo\", contents=\"foo\", name=\"foo\"):\n    tmp_dir.scm_gen(file, contents, commit=\"init\")\n    rev = scm.get_rev()\n    ref = f\"{EXPS_NAMESPACE}/ab/c123/{name}\"\n    scm.dulwich.repo.refs[ref.encode(\"utf-8\")] = rev.encode(\"utf-8\")\n    return ref, rev\n\n\n@pytest.mark.parametrize(\"use_url\", [True, False])\n@pytest.mark.parametrize(\"name_only\", [True, False])\ndef test_resolve_exp_ref(tmp_dir, scm, git_upstream, name_only, use_url):\n    ref, _ = commit_exp_ref(tmp_dir, scm)\n    name = \"foo\" if name_only else ref\n    result = resolve_name(scm, [name, \"notexist\"])\n    assert isinstance(result[name], ExpRefInfo)\n    assert str(result[name]) == ref\n    assert result[\"notexist\"] is None\n\n    scm.push_refspecs(git_upstream.url, f\"{ref}:{ref}\")\n    remote = git_upstream.url if use_url else git_upstream.remote\n    name = \"foo\" if name_only else ref\n    remote_ref_info = resolve_name(scm, [name], remote)[name]\n    assert isinstance(remote_ref_info, ExpRefInfo)\n    assert str(remote_ref_info) == ref\n\n\n@pytest.mark.parametrize(\n    \"name,result\",\n    [\n        (\"name\", True),\n        (\"group/name\", False),\n        (\"na me\", False),\n        (\"invalid/.name\", False),\n        (\"@\", pytest.param(False, marks=pytest.mark.xfail)),\n        (\":\", False),\n        (\"^\", False),\n        (\"*\", False),\n        (\"~\", False),\n        (\"?\", False),\n    ],\n)\ndef test_run_check_ref_format(scm, name, result):\n    ref = ExpRefInfo(\"abc123\", name)\n    if result:\n        check_ref_format(scm, ref)\n    else:\n        with pytest.raises(InvalidArgumentError):\n            check_ref_format(scm, ref)\n\n\n@pytest.mark.parametrize(\n    \"params,expected\",\n    [\n        (\n            {\"workspace\": {\"data\": {\"params.yaml\": {\"data\": {\"foo\": 1}}}}},\n            {\"params.yaml\": {\"foo\": 1}},\n        ),\n        (\n            {\"workspace\": {\"data\": {\"params.yaml\": {\"error\": \"FileNotFound\"}}}},\n            {\"params.yaml\": {}},\n        ),\n        (\n            {\"workspace\": {\"error\": \"something went wrong\"}},\n            {},\n        ),\n    ],\n)\ndef test_to_studio_params(params, expected):\n    assert to_studio_params(params) == expected\n\n\ndef test_gen_random_name():\n    assert re.match(\"[a-zA-Z]+-[a-zA-Z]+\", gen_random_name())\n"
  },
  {
    "path": "tests/unit/repo/plots/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/repo/plots/test_diff.py",
    "content": "import pytest\n\nfrom dvc.repo.plots.diff import _revisions\n\n\n@pytest.mark.parametrize(\n    \"arg_revisions,is_dirty,expected_revisions\",\n    [\n        ([], False, [\"workspace\"]),\n        ([], True, [\"HEAD\", \"workspace\"]),\n        ([\"v1\", \"v2\", \"workspace\"], False, [\"v1\", \"v2\", \"workspace\"]),\n        ([\"v1\", \"v2\", \"workspace\"], True, [\"v1\", \"v2\", \"workspace\"]),\n    ],\n)\ndef test_revisions(mocker, arg_revisions, is_dirty, expected_revisions):\n    mock_scm = mocker.Mock()\n    mock_scm.configure_mock(\n        **{\"is_dirty.return_value\": is_dirty, \"get_ref.return_value\": None}\n    )\n    mock_repo = mocker.Mock(scm=mock_scm)\n    assert _revisions(mock_repo, arg_revisions, False) == expected_revisions\n\n\n@pytest.mark.parametrize(\n    \"arg_revisions,baseline,expected_revisions\",\n    [\n        ([\"v1\"], \"v0\", [\"v1\", \"v0\"]),\n        ([\"v1\"], None, [\"v1\", \"workspace\"]),\n        ([\"v1\", \"v2\"], \"v0\", [\"v1\", \"v2\"]),\n        ([\"v1\", \"v2\"], None, [\"v1\", \"v2\"]),\n    ],\n)\ndef test_revisions_experiment(mocker, arg_revisions, baseline, expected_revisions):\n    mock_scm = mocker.Mock()\n    mock_scm.configure_mock(\n        **{\"is_dirty.return_value\": False, \"get_ref.return_value\": None}\n    )\n    mock_experiments = mocker.Mock()\n    mock_experiments.configure_mock(**{\"get_baseline.return_value\": baseline})\n    mock_repo = mocker.Mock(scm=mock_scm, experiments=mock_experiments)\n    assert _revisions(mock_repo, arg_revisions, True) == expected_revisions\n"
  },
  {
    "path": "tests/unit/repo/test_graph.py",
    "content": "import pytest\nfrom networkx import DiGraph\nfrom networkx.utils import graphs_equal\n\nfrom dvc.repo.graph import get_subgraph_of_nodes\n\n\n@pytest.mark.parametrize(\n    \"nodes,downstream,expected_edges\",\n    [\n        ([], False, {1: [2, 3], 2: [4, 5], 3: [6, 7], 8: [9]}),\n        ([1], False, {1: [2, 3], 2: [4, 5], 3: [6, 7]}),\n        ([2], False, {2: [4, 5]}),\n        ([3], False, {3: [6, 7]}),\n        ([8], False, [(8, 9)]),\n        ([2, 3, 8], False, {2: [4, 5], 3: [6, 7], 8: [9]}),\n        ([4], False, {4: []}),\n        ([], True, {1: [2, 3], 2: [4, 5], 3: [6, 7], 8: [9]}),\n        ([1], True, {1: []}),\n        ([9], True, [(8, 9)]),\n        ([2], True, [(1, 2)]),\n        ([6], True, [(1, 3), (3, 6)]),\n        ([2, 3, 8], True, {1: [2, 3], 8: []}),\n        ([4, 7], True, {1: [2, 3], 2: [4], 3: [7]}),\n    ],\n)\ndef test_subgraph_of_nodes(nodes, downstream, expected_edges):\n    r\"\"\"\n             1\n           /   \\\n          2     3      8\n         / \\   / \\     |\n        4   5 6   7    9\n    \"\"\"\n    graph = DiGraph({1: [2, 3], 2: [4, 5], 3: [6, 7], 8: [9]})\n    subgraph = get_subgraph_of_nodes(graph, nodes, downstream=downstream)\n    expected = DiGraph(expected_edges)\n    assert graphs_equal(expected, subgraph)\n"
  },
  {
    "path": "tests/unit/repo/test_open_repo.py",
    "content": "import os\n\nimport pytest\n\nfrom dvc.repo.open_repo import _external_repo as external_repo\nfrom dvc.testing.tmp_dir import make_subrepo\n\n\ndef test_hook_is_called(tmp_dir, erepo_dir, mocker):\n    subrepo_paths = [\n        \"subrepo1\",\n        \"subrepo2\",\n        os.path.join(\"dir\", \"subrepo3\"),\n        os.path.join(\"dir\", \"subrepo4\"),\n        \"subrepo5\",\n        os.path.join(\"subrepo5\", \"subrepo6\"),\n    ]\n    subrepos = [erepo_dir / path for path in subrepo_paths]\n    for repo in subrepos:\n        make_subrepo(repo, erepo_dir.scm)\n\n    for repo in [*subrepos, erepo_dir]:\n        with repo.chdir():\n            repo.scm_gen(\"foo\", \"foo\", commit=f\"git add {repo}/foo\")\n            repo.dvc_gen(\"bar\", \"bar\", commit=f\"dvc add {repo}/bar\")\n\n    with external_repo(str(erepo_dir), subrepos=True, uninitialized=True) as repo:\n        spy = mocker.spy(repo.dvcfs.fs, \"repo_factory\")\n\n        list(repo.dvcfs.walk(\"\", ignore_subrepos=False))  # drain\n        assert spy.call_count == len(subrepos)\n\n        paths = [\"/\" + path.replace(\"\\\\\", \"/\") for path in subrepo_paths]\n        spy.assert_has_calls(\n            [\n                mocker.call(\n                    path,\n                    fs=repo.fs,\n                    scm=repo.scm,\n                    repo_factory=repo.dvcfs.fs.repo_factory,\n                )\n                for path in paths\n            ],\n            any_order=True,\n        )\n\n\n@pytest.mark.parametrize(\"root_is_dvc\", [False, True])\ndef test_subrepo_is_constructed_properly(\n    tmp_dir, scm, mocker, make_tmp_dir, root_is_dvc\n):\n    if root_is_dvc:\n        make_subrepo(tmp_dir, scm)\n\n    subrepo = tmp_dir / \"subrepo\"\n    make_subrepo(subrepo, scm)\n    local_cache = subrepo.dvc.cache.local_cache_dir\n\n    tmp_dir.scm_gen(\"bar\", \"bar\", commit=\"add bar\")\n    subrepo.dvc_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    cache_dir = make_tmp_dir(\"temp-cache\")\n    with external_repo(\n        str(tmp_dir),\n        subrepos=True,\n        uninitialized=True,\n        config={\"cache\": {\"dir\": str(cache_dir), \"type\": [\"symlink\"]}},\n    ) as repo:\n        spy = mocker.spy(repo.dvcfs.fs, \"repo_factory\")\n\n        list(repo.dvcfs.walk(\"\", ignore_subrepos=False))  # drain\n        assert spy.call_count == 1\n        subrepo = spy.spy_return\n\n        assert repo.url == str(tmp_dir)\n        assert repo.config[\"cache\"][\"dir\"] == str(cache_dir)\n        assert repo.cache.local.path == os.path.join(cache_dir, \"files\", \"md5\")\n        assert subrepo.cache.local.path == os.path.join(cache_dir, \"files\", \"md5\")\n\n        assert repo.config[\"cache\"][\"type\"] == [\"symlink\"]\n        assert repo.cache.local.cache_types == [\"symlink\"]\n        assert subrepo.cache.local.cache_types == [\"symlink\"]\n\n        assert subrepo.config[\"remote\"][\"auto-generated-upstream\"][\"url\"] == local_cache\n        if root_is_dvc:\n            main_cache = tmp_dir.dvc.cache.local_cache_dir\n            assert repo.config[\"remote\"][\"auto-generated-upstream\"][\"url\"] == main_cache\n"
  },
  {
    "path": "tests/unit/repo/test_repo.py",
    "content": "import os\nimport shutil\n\nimport pytest\n\nfrom dvc.exceptions import OutputDuplicationError\nfrom dvc.repo import NotDvcRepoError, Repo, locked\nfrom dvc_data.hashfile.hash_info import HashInfo\n\n\ndef test_is_dvc_internal(dvc):\n    assert dvc.is_dvc_internal(os.path.join(\"path\", \"to\", \".dvc\", \"file\"))\n    assert not dvc.is_dvc_internal(os.path.join(\"path\", \"to-non-.dvc\", \"file\"))\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        os.path.join(\"dir\", \"subdir\", \"file\"),\n        os.path.join(\"dir\", \"subdir\"),\n        \"dir\",\n    ],\n)\ndef test_find_outs_by_path(tmp_dir, dvc, path):\n    (stage,) = tmp_dir.dvc_gen({\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}})\n\n    outs = dvc.find_outs_by_path(path, strict=False)\n    assert len(outs) == 1\n    assert outs[0].fs_path == stage.outs[0].fs_path\n\n\ndef test_find_outs_by_path_does_graph_checks(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo\")\n    shutil.copyfile(\"foo.dvc\", \"foo-2.dvc\")\n\n    dvc._reset()\n    with pytest.raises(OutputDuplicationError):\n        dvc.find_outs_by_path(\"foo\")\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [os.path.join(\"dir\", \"subdir\", \"file\"), os.path.join(\"dir\", \"subdir\")],\n)\ndef test_used_objs(tmp_dir, dvc, path):\n    tmp_dir.dvc_gen({\"dir\": {\"subdir\": {\"file\": \"file\"}, \"other\": \"other\"}})\n\n    expected = {\n        HashInfo(\"md5\", \"70922d6bf66eb073053a82f77d58c536.dir\"),\n        HashInfo(\"md5\", \"8c7dd922ad47494fc02c388e12c00eac\"),\n    }\n\n    used = set()\n    for _, obj_ids in dvc.used_objs([path]).items():\n        used.update(obj_ids)\n\n    assert used == expected\n\n\ndef test_locked(mocker):\n    repo = mocker.MagicMock()\n    repo._lock_depth = 0\n    repo.method = locked(repo.method)\n\n    args = ()\n    kwargs = {}\n    repo.method(repo, args, kwargs)\n\n    assert repo.method_calls == [\n        mocker.call._reset(),\n        mocker.call.method(repo, args, kwargs),\n        mocker.call._reset(),\n    ]\n\n\ndef test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy):\n    # See https://github.com/treeverse/dvc/issues/2671 for more info\n    from dvc.repo.index import Index\n\n    mock_build_graph = mocker.spy(Index.graph, \"fget\")\n\n    # sanity check\n    tmp_dir.gen(\"foo\", \"foo text\")\n    dvc.add(\"foo\")\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n    assert mock_build_graph.called\n\n    # check that our hack can be enabled\n    mock_build_graph.reset_mock()\n    dvc._skip_graph_checks = True\n    tmp_dir.gen(\"baz\", \"baz text\")\n    run_copy(\"baz\", \"qux\", name=\"copy-baz-qux\")\n    assert not mock_build_graph.called\n\n    # check that our hack can be disabled\n    mock_build_graph.reset_mock()\n    dvc._skip_graph_checks = False\n    tmp_dir.gen(\"quux\", \"quux text\")\n    run_copy(\"quux\", \"quuz\", name=\"copy-quux-quuz\")\n    assert mock_build_graph.called\n\n\ndef test_branch_config(tmp_dir, scm):\n    tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"init\")\n\n    # sanity check\n    with pytest.raises(NotDvcRepoError):\n        Repo().close()\n\n    scm.checkout(\"branch\", create_new=True)\n    dvc = Repo.init()\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"branch\"] = {\"url\": \"/some/path\"}\n    dvc.close()\n\n    scm.add([os.path.join(\".dvc\", \"config\")])\n    scm.commit(\"init dvc\")\n    scm.checkout(\"master\")\n\n    with pytest.raises(NotDvcRepoError):\n        Repo(rev=\"master\").close()\n\n    dvc = Repo(rev=\"branch\")\n    try:\n        assert dvc.config[\"remote\"][\"branch\"][\"url\"] == \"/some/path\"\n    finally:\n        dvc.close()\n\n\ndef test_dynamic_cache_initialization(tmp_dir, scm):\n    dvc = Repo.init()\n    with dvc.config.edit() as conf:\n        conf[\"cache\"][\"ssh\"] = \"foo\"\n        conf[\"remote\"][\"foo\"] = {\"url\": \"remote://bar/baz\"}\n    dvc.close()\n\n    Repo(str(tmp_dir)).close()\n"
  },
  {
    "path": "tests/unit/repo/test_reproduce.py",
    "content": "from itertools import chain\n\nfrom networkx import DiGraph\nfrom networkx.utils import graphs_equal\n\nfrom dvc.repo.reproduce import get_active_graph, plan_repro\nfrom dvc.testing import matchers as M\n\n\ndef test_active_graph(mocker):\n    n = mocker.sentinel\n    n1, n2, n3, n4, n5, n6, n7, n8, n9 = (getattr(n, f\"n{i}\") for i in range(1, 10))\n    edges = {n1: [n2, n3], n2: [n4, n5], n3: [n6, n7], n8: [n9]}\n    for node in chain.from_iterable([n, *v] for n, v in edges.items()):\n        node.frozen = False\n\n    g = DiGraph(edges)\n\n    active = get_active_graph(g)\n    assert graphs_equal(g, active)\n\n    n2.frozen = True\n    active = get_active_graph(g)\n    assert g.edges() - active.edges() == {(n2, n5), (n2, n4)}\n    assert n2 in active\n    assert not active.edges() - g.edges()\n    assert not graphs_equal(g, active)\n\n\ndef test_repro_plan():\n    r\"\"\"\n             1\n           /  \\\n          2    3    8\n         / \\  / \\   |\n        4  5 6  7   9\n    \"\"\"\n    g = DiGraph({1: [2, 3], 2: [4, 5], 3: [6, 7], 8: [9]})\n\n    assert plan_repro(g) == [4, 5, 2, 6, 7, 3, 1, 9, 8]\n    assert plan_repro(g, [1]) == [4, 5, 2, 6, 7, 3, 1]\n    assert plan_repro(g, [4], downstream=True) == [4, 2, 1]\n    assert plan_repro(g, [8], True) == plan_repro(g, [9], True) == [9, 8]\n    assert plan_repro(g, [2, 8], True) == [4, 5, 2, 6, 7, 3, 1, 9, 8]\n    assert plan_repro(g, [2, 3], downstream=True) == [M.any_of(2, 3), M.any_of(2, 3), 1]\n\n\ndef test_number_reproduces(tmp_dir, dvc, mocker):\n    mock = mocker.Mock(return_value=None)\n    tmp_dir.dvc_gen({\"pre-foo\": \"pre-foo\"})\n\n    dvc.stage.add(name=\"echo-foo\", outs=[\"foo\"], cmd=\"echo foo > foo\", verify=False)\n    dvc.stage.add(\n        name=\"echo-bar\", deps=[\"foo\"], outs=[\"bar\"], cmd=\"echo bar > bar\", verify=False\n    )\n    dvc.stage.add(\n        name=\"echo-baz\", deps=[\"foo\"], outs=[\"baz\"], cmd=\"echo baz > baz\", verify=False\n    )\n    dvc.stage.add(\n        name=\"echo-boop\",\n        deps=[\"bar\"],\n        outs=[\"boop\"],\n        cmd=\"echo boop > boop\",\n        verify=False,\n    )\n\n    dvc.reproduce(all_pipelines=True, repro_fn=mock)\n    assert mock.call_count == 5\n"
  },
  {
    "path": "tests/unit/repo/test_scm_context.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc.repo.scm_context import SCMContext\nfrom dvc.scm import Git, NoSCM\n\n\ndef pytest_generate_tests(metafunc):\n    if \"scm_context\" in metafunc.fixturenames:\n        metafunc.parametrize(\"scm_context\", [\"scm\", \"no_scm\"], indirect=True)\n\n\n@pytest.fixture\ndef scm_context(request, mocker):\n    spec = Git if getattr(request, \"param\", \"scm\") == \"scm\" else NoSCM\n    # we'll test `ignore` and `ignore_remove` in a functional test.\n    return SCMContext(\n        scm=mocker.MagicMock(\n            spec=spec,\n            **{\n                \"ignore_remove.return_value\": \".gitignore\",\n                \"ignore.return_value\": \".gitignore\",\n            },\n        )\n    )\n\n\ndef test_scm_track_file(scm_context):\n    scm_context.track_file(\"foo\")\n    assert scm_context.files_to_track == {\"foo\"}\n    scm_context.track_file(\"bar\")\n    assert scm_context.files_to_track == {\"foo\", \"bar\"}\n\n\ndef test_scm_track_changed_files(scm_context):\n    scm_context.track_changed_files()\n    scm_context.scm.add.assert_not_called()\n\n    scm_context.track_file(\"foo\")\n    scm_context.track_changed_files()\n    scm_context.scm.add.assert_called_once_with([\"foo\"])\n\n\ndef test_ignore(scm_context):\n    scm_context.ignore(\"foo\")\n\n    scm_context.scm.ignore.assert_called_once_with(\"foo\")\n    assert scm_context.files_to_track == {\".gitignore\"}\n    assert scm_context.ignored_paths == [\"foo\"]\n\n\ndef test_ignore_remove(scm_context):\n    scm_context.ignore_remove(\"foo\")\n    scm_context.scm.ignore_remove.assert_called_once_with(\"foo\")\n    assert scm_context.files_to_track == {\".gitignore\"}\n\n\ndef test_scm_context_reset_on_exit(scm_context):\n    with scm_context:\n        scm_context.ignore(\"foo\")\n        scm_context.track_file(\"bar\")\n    assert not scm_context.files_to_track\n    assert not scm_context.ignored_paths\n\n\ndef test_scm_context_autostage_changed_files(scm_context):\n    scm_context.autostage = True\n\n    with scm_context:\n        scm_context.track_file(\"foo\")\n        assert scm_context.files_to_track == {\"foo\"}\n\n    assert not scm_context.files_to_track\n    assert not scm_context.ignored_paths\n    scm_context.scm.add.assert_called_once_with([\"foo\"])\n\n\ndef test_scm_context_clears_ignores_on_error(scm_context):\n    class CustomException(Exception):  # noqa: N818\n        pass\n\n    with pytest.raises(CustomException), scm_context():  # noqa: PT012\n        scm_context.ignore(\"foo\")\n        assert scm_context.ignored_paths == [\"foo\"]\n        raise CustomException\n\n    scm_context.scm.ignore_remove.assert_called_once_with(\"foo\")\n    assert scm_context.files_to_track == {\".gitignore\"}\n    assert not scm_context.ignored_paths\n\n\n@pytest.mark.parametrize(\"autostage\", [True, False])\n@pytest.mark.parametrize(\"quiet\", [True, False])\ndef test_scm_context_on_no_files_to_track(caplog, scm_context, autostage, quiet):\n    with scm_context(autostage=autostage, quiet=quiet):\n        pass\n\n    scm_context.scm.assert_not_called()\n    assert not caplog.text\n\n\n@pytest.mark.parametrize(\"quiet\", [True, False])\ndef test_scm_context_remind_to_track(caplog, scm_context, quiet):\n    with scm_context(quiet=quiet) as context:\n        context.track_file(\"foo\")\n        context.track_file(\"lorem ipsum\")\n        assert context.files_to_track == {\"foo\", \"lorem ipsum\"}\n\n    if quiet or isinstance(scm_context.scm, NoSCM):\n        assert not caplog.text\n    else:\n        assert \"To track the changes with git, run:\" in caplog.text\n        match = re.search(r\"git add(?: (('.*?')|(\\S+)))*\", caplog.text)\n        assert match\n        assert set(match.groups()) == {\"'lorem ipsum'\", \"foo\"}\n\n\ndef test_scm_context_remind_disable(caplog, scm_context):\n    with scm_context(quiet=True) as context:\n        context.track_file(\"foo\")\n        assert context.files_to_track == {\"foo\"}\n    assert not caplog.text\n\n    assert scm_context.quiet is False\n    scm_context.quiet = True\n    with scm_context() as context:\n        context.track_file(\"foo\")\n        assert context.files_to_track == {\"foo\"}\n    assert not caplog.text\n\n\ndef test_scm_context_decorator(scm_context, mocker):\n    from dvc.repo.scm_context import scm_context as decorator\n\n    repo = mocker.MagicMock(scm_context=scm_context)\n\n    def test_method(repo, *args, **kwargs):\n        scm_context.track_file(\"foo\")\n\n    method = mocker.MagicMock(wraps=test_method)\n    decorator(method, autostage=True)(repo, \"arg\", kw=1)\n    method.assert_called_once_with(repo, \"arg\", kw=1)\n    scm_context.scm.add.assert_called_once_with([\"foo\"])\n"
  },
  {
    "path": "tests/unit/scm/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/scm/test_scm.py",
    "content": "from datetime import datetime\n\nfrom scmrepo.exceptions import SCMError\n\nfrom dvc.repo.experiments import ExpRefInfo\nfrom dvc.scm import iter_revs\n\n\ndef test_iter_revs(tmp_dir, scm, mocker):\n    \"\"\"\n    new         other\n     │            │\n    old (tag) ────┘\n     │\n    root\n    \"\"\"\n    old = scm.active_branch()\n    tmp_dir.scm_gen(\"foo\", \"init\", commit=\"init\")\n    rev_root = scm.get_rev()\n    tmp_dir.scm_gen(\"foo\", \"old\", commit=\"old\")\n    rev_old = scm.get_rev()\n    scm.checkout(\"new\", create_new=True)\n    tmp_dir.scm_gen(\"foo\", \"new\", commit=\"new\")\n    rev_new = scm.get_rev()\n    scm.checkout(old)\n    scm.tag(\"tag\")\n    scm.checkout(\"other\", create_new=True)\n    tmp_dir.scm_gen(\"foo\", \"other\", commit=\"new\")\n    rev_other = scm.get_rev()\n\n    ref = ExpRefInfo(rev_root, \"exp1\")\n    scm.set_ref(str(ref), rev_new)\n    ref = ExpRefInfo(rev_root, \"exp2\")\n    scm.set_ref(str(ref), rev_old)\n\n    gen = iter_revs(scm, [rev_root, \"new\"], 1)\n    assert gen == {rev_root: [rev_root], rev_new: [\"new\"]}\n    gen = iter_revs(scm, [\"new\"], 2)\n    assert gen == {rev_new: [\"new\"], rev_old: [rev_old]}\n    gen = iter_revs(scm, [\"other\"], -1)\n    assert gen == {\n        rev_other: [\"other\"],\n        rev_old: [rev_old],\n        rev_root: [rev_root],\n    }\n    gen = iter_revs(scm, [\"tag\"])\n    assert gen == {rev_old: [\"tag\"]}\n    gen = iter_revs(scm, all_branches=True)\n    assert gen == {rev_old: [old], rev_new: [\"new\"], rev_other: [\"other\"]}\n    gen = iter_revs(scm, all_branches=True, num=2)\n    assert gen == {\n        rev_old: [old, rev_old],\n        rev_root: [rev_root],\n        rev_new: [\"new\"],\n        rev_other: [\"other\"],\n    }\n    gen = iter_revs(scm, all_tags=True)\n    assert gen == {rev_old: [\"tag\"]}\n    gen = iter_revs(scm, all_commits=True)\n    assert gen == {\n        rev_old: [rev_old],\n        rev_new: [rev_new],\n        rev_other: [rev_other],\n        rev_root: [rev_root],\n    }\n    gen = iter_revs(scm, all_experiments=True)\n    assert gen == {\n        rev_new: [rev_new],\n        rev_old: [rev_old],\n    }\n\n    def _resolve_commit(rev):\n        from scmrepo.git.objects import GitCommit\n\n        if rev == rev_root:\n            return GitCommit(\n                \"dummy\",\n                commit_time=datetime(2022, 6, 28).timestamp(),  # noqa: DTZ001\n                commit_time_offset=0,\n                message=\"dummy\",\n                parents=[\"dummy\"],\n                committer_name=\"dummy\",\n                committer_email=\"dummy\",\n                author_name=\"dummy\",\n                author_email=\"dummy\",\n                author_time=datetime(2022, 6, 28).timestamp(),  # noqa: DTZ001\n                author_time_offset=0,\n            )\n        if rev == rev_old:\n            raise SCMError\n        return GitCommit(\n            \"dummy\",\n            commit_time=datetime(2022, 6, 30).timestamp(),  # noqa: DTZ001\n            commit_time_offset=0,\n            message=\"dummy\",\n            parents=[\"dummy\"],\n            committer_name=\"dummy\",\n            committer_email=\"dummy\",\n            author_name=\"dummy\",\n            author_email=\"dummy\",\n            author_time=datetime(2022, 6, 28).timestamp(),  # noqa: DTZ001\n            author_time_offset=0,\n        )\n\n    mocker.patch(\n        \"scmrepo.git.Git.resolve_commit\",\n        mocker.MagicMock(side_effect=_resolve_commit),\n    )\n\n    gen = iter_revs(scm, commit_date=\"2022-06-29\")\n    assert gen == {\n        rev_new: [rev_new],\n        rev_old: [rev_old],\n        rev_other: [rev_other],\n    }\n"
  },
  {
    "path": "tests/unit/stage/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/stage/test_cache.py",
    "content": "import os\n\nimport pytest\n\nimport dvc.output as dvc_output\n\n\ndef test_stage_cache(tmp_dir, dvc, mocker):\n    tmp_dir.gen(\"dep\", \"dep\")\n    tmp_dir.gen(\"script.py\", 'open(\"out\", \"w+\").write(\"out\"); ')\n    stage = dvc.run(\n        cmd=\"python script.py\",\n        deps=[\"script.py\", \"dep\"],\n        outs=[\"out\"],\n        name=\"write-out\",\n    )\n\n    with dvc.lock:\n        stage.remove(remove_outs=True, force=True)\n\n    assert not (tmp_dir / \"out\").exists()\n    assert not (tmp_dir / \"dvc.yaml\").exists()\n    assert not (tmp_dir / \"dvc.lock\").exists()\n\n    cache_dir = os.path.join(\n        dvc.stage_cache.cache_dir,\n        \"c7\",\n        \"c7a85d71de1912c43d9c5b6218c71630d6277e8fc11e4ccf5e12b3c202234838\",\n    )\n    cache_file = os.path.join(\n        cache_dir, \"5e2824029f8da9ef6c57131a638ceb65f27c02f16c85d71e85671c27daed0501\"\n    )\n\n    assert os.path.isdir(cache_dir)\n    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]\n    assert os.path.isfile(cache_file)\n\n    run_spy = mocker.patch(\"dvc.stage.run.cmd_run\")\n    checkout_spy = mocker.spy(dvc_output, \"checkout\")\n    with dvc.lock:\n        stage.run()\n\n    assert not run_spy.called\n    assert checkout_spy.call_count == 2\n\n    assert (tmp_dir / \"out\").exists()\n    assert (tmp_dir / \"out\").read_text() == \"out\"\n\n\ndef test_stage_cache_params(tmp_dir, dvc, mocker):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\\nbar: 2\")\n    tmp_dir.gen(\"myparams.yaml\", \"baz: 3\\nqux: 4\")\n    tmp_dir.gen(\"script.py\", 'open(\"out\", \"w+\").write(\"out\"); ')\n    stage = dvc.run(\n        cmd=\"python script.py\",\n        params=[\"foo,bar\", \"myparams.yaml:baz,qux\"],\n        outs=[\"out\"],\n        name=\"write-out\",\n    )\n\n    with dvc.lock:\n        stage.remove(remove_outs=True, force=True)\n\n    assert not (tmp_dir / \"out\").exists()\n    assert not (tmp_dir / \"dvc.yaml\").exists()\n    assert not (tmp_dir / \"dvc.lock\").exists()\n\n    cache_dir = os.path.join(\n        dvc.stage_cache.cache_dir,\n        \"8f\",\n        \"8fdb377d1b4c0a303b788771b122dfba9bbbbc43f14ce41d35715cf4fea08459\",\n    )\n    cache_file = os.path.join(\n        cache_dir, \"9ce1963a69beb1299800188647cd960b7afff101be19fd46226e32bb8be8ee44\"\n    )\n\n    assert os.path.isdir(cache_dir)\n    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]\n    assert os.path.isfile(cache_file)\n\n    run_spy = mocker.patch(\"dvc.stage.run.cmd_run\")\n    checkout_spy = mocker.spy(dvc_output, \"checkout\")\n    with dvc.lock:\n        stage.run()\n\n    assert not run_spy.called\n    assert checkout_spy.call_count == 2\n\n    assert (tmp_dir / \"out\").exists()\n    assert (tmp_dir / \"out\").read_text() == \"out\"\n\n\ndef test_stage_cache_wdir(tmp_dir, dvc, mocker):\n    tmp_dir.gen(\"dep\", \"dep\")\n    tmp_dir.gen(\"script.py\", 'open(\"out\", \"w+\").write(\"out\"); ')\n    tmp_dir.gen({\"wdir\": {}})\n    stage = dvc.run(\n        cmd=\"python ../script.py\",\n        deps=[\"../script.py\", \"../dep\"],\n        outs=[\"out\"],\n        name=\"write-out\",\n        wdir=\"wdir\",\n    )\n\n    with dvc.lock:\n        stage.remove(remove_outs=True, force=True)\n\n    assert not (tmp_dir / \"wdir\" / \"out\").exists()\n    assert not (tmp_dir / \"wdir\" / \"dvc.yaml\").exists()\n    assert not (tmp_dir / \"wdir\" / \"dvc.lock\").exists()\n\n    cache_dir = os.path.join(\n        dvc.stage_cache.cache_dir,\n        \"6a\",\n        \"6ad5cce3347e9e96c77d4353d84e5c8cae8c9151c486c4ea3d3d79e9051800f1\",\n    )\n    cache_file = os.path.join(\n        cache_dir, \"1fa3e1a9f785f8364ad185d62bd0813ce8794afcfc79410e985eac0443cc5462\"\n    )\n\n    assert os.path.isdir(cache_dir)\n    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]\n    assert os.path.isfile(cache_file)\n\n    run_spy = mocker.patch(\"dvc.stage.run.cmd_run\")\n    checkout_spy = mocker.spy(dvc_output, \"checkout\")\n    with dvc.lock:\n        stage.run()\n\n    assert not run_spy.called\n    assert checkout_spy.call_count == 2\n\n    assert (tmp_dir / \"wdir\" / \"out\").exists()\n    assert (tmp_dir / \"wdir\" / \"out\").read_text() == \"out\"\n\n\ndef test_shared_stage_cache(tmp_dir, dvc, run_copy):\n    import stat\n\n    from dvc.cachemgr import CacheManager\n\n    tmp_dir.gen(\"foo\", \"foo\")\n\n    with dvc.config.edit() as config:\n        config[\"cache\"][\"shared\"] = \"group\"\n\n    dvc.cache = CacheManager(dvc)\n\n    assert not os.path.exists(dvc.cache.local.path)\n\n    run_copy(\"foo\", \"bar\", name=\"copy-foo-bar\")\n\n    parent_cache_dir = os.path.join(dvc.stage_cache.cache_dir, \"6d\")\n    cache_dir = os.path.join(\n        parent_cache_dir,\n        \"6d4c6de74e7c0d60d2122e2063b4724a8c78a6799def6c7cf5093f45e7f2f3b7\",\n    )\n    cache_file = os.path.join(\n        cache_dir, \"435e34f80692059e79ec53346cbfe29fd9ee65b62f5f06f17f5950ce5a7408ea\"\n    )\n\n    # sanity check\n    assert os.path.isdir(cache_dir)\n    assert os.listdir(cache_dir) == [os.path.basename(cache_file)]\n    assert os.path.isfile(cache_file)\n\n    def _mode(path):\n        return stat.S_IMODE(os.stat(path).st_mode)\n\n    if os.name == \"nt\":\n        dir_mode = 0o777\n        file_mode = 0o666\n    else:\n        dir_mode = 0o2775\n        file_mode = 0o664\n\n    assert _mode(dvc.cache.local.path) == dir_mode\n    assert _mode(dvc.stage_cache.cache_dir) == dir_mode\n    assert _mode(parent_cache_dir) == dir_mode\n    assert _mode(cache_dir) == dir_mode\n    assert _mode(cache_file) == file_mode\n\n\n@pytest.mark.parametrize(\n    \"kwargs\",\n    [\n        {},\n        {\"cmd\": \"cmd\"},\n        {\"cmd\": \"cmd\", \"deps\": [\"path\"]},\n        {\"cmd\": \"cmd\", \"outs\": [\"path\"]},\n        {\"always_changed\": True},\n    ],\n)\ndef test_unhashable(tmp_dir, dvc, mocker, kwargs):\n    from dvc.stage import Stage, create_stage\n    from dvc.stage.cache import RunCacheNotFoundError, StageCache\n\n    cache = StageCache(dvc)\n    stage = create_stage(Stage, path=\"stage.dvc\", repo=dvc, **kwargs)\n    get_stage_hash = mocker.patch(\"dvc.stage.cache._get_stage_hash\")\n    assert cache.save(stage) is None\n    get_stage_hash.assert_not_called()\n    with pytest.raises(RunCacheNotFoundError):\n        cache.restore(stage)\n    get_stage_hash.assert_not_called()\n"
  },
  {
    "path": "tests/unit/stage/test_loader_pipeline_file.py",
    "content": "import os\nfrom copy import deepcopy\nfrom itertools import chain\n\nimport pytest\n\nfrom dvc.dvcfile import PROJECT_FILE, load_file\nfrom dvc.stage import PipelineStage, create_stage\nfrom dvc.stage.loader import StageLoader\nfrom dvc.stage.serialize import split_params_deps\nfrom dvc_data.hashfile.hash_info import HashInfo\n\n\n@pytest.fixture\ndef stage_data():\n    return {\"cmd\": \"command\", \"deps\": [\"foo\"], \"outs\": [\"bar\"]}\n\n\n@pytest.fixture\ndef lock_data():\n    return {\n        \"cmd\": \"command\",\n        \"deps\": [{\"path\": \"foo\", \"md5\": \"foo_checksum\", \"hash\": \"md5\"}],\n        \"outs\": [{\"path\": \"bar\", \"md5\": \"bar_checksum\", \"hash\": \"md5\"}],\n    }\n\n\ndef test_fill_from_lock_deps_outs(dvc, lock_data):\n    stage = create_stage(PipelineStage, dvc, PROJECT_FILE, deps=[\"foo\"], outs=[\"bar\"])\n\n    for item in chain(stage.deps, stage.outs):\n        assert not item.hash_info\n\n    StageLoader.fill_from_lock(stage, lock_data)\n\n    assert stage.deps[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n\ndef test_fill_from_lock_outs_isexec(dvc):\n    stage = create_stage(PipelineStage, dvc, PROJECT_FILE, outs=[\"foo\"])\n\n    assert not stage.outs[0].meta.isexec\n\n    StageLoader.fill_from_lock(\n        stage,\n        {\n            \"cmd\": \"command\",\n            \"outs\": [\n                {\"path\": \"foo\", \"md5\": \"foo_checksum\", \"isexec\": True, \"hash\": \"md5\"}\n            ],\n        },\n    )\n\n    assert stage.outs[0].def_path == \"foo\"\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert stage.outs[0].meta.isexec\n\n\ndef test_fill_from_lock_params(dvc, lock_data):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        PROJECT_FILE,\n        deps=[\"foo\"],\n        outs=[\"bar\"],\n        params=[\n            \"lorem\",\n            \"lorem.ipsum\",\n            {\"myparams.yaml\": [\"ipsum\", \"foobar\"]},\n        ],\n    )\n    lock_data[\"params\"] = {\n        \"params.yaml\": {\n            \"lorem\": \"lorem\",\n            \"lorem.ipsum\": [\"i\", \"p\", \"s\", \"u\", \"m\"],\n        },\n        \"myparams.yaml\": {\n            # missing value in lock for `foobar` params\n            \"ipsum\": \"ipsum\"\n        },\n    }\n    params_deps = split_params_deps(stage)[0]\n    assert set(params_deps[0].params) == {\"lorem\", \"lorem.ipsum\"}\n    assert set(params_deps[1].params) == {\"ipsum\", \"foobar\"}\n    assert not params_deps[0].hash_info\n    assert not params_deps[1].hash_info\n\n    StageLoader.fill_from_lock(stage, lock_data)\n    assert params_deps[0].hash_info.value == lock_data[\"params\"][\"params.yaml\"]\n    assert params_deps[1].hash_info.value == lock_data[\"params\"][\"myparams.yaml\"]\n\n\ndef test_fill_from_lock_missing_params_section(dvc, lock_data):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        PROJECT_FILE,\n        deps=[\"foo\"],\n        outs=[\"bar\"],\n        params=[\"lorem\", \"lorem.ipsum\", {\"myparams.yaml\": [\"ipsum\"]}],\n    )\n    params_deps = split_params_deps(stage)[0]\n    StageLoader.fill_from_lock(stage, lock_data)\n    assert not params_deps[0].hash_info\n    assert not params_deps[1].hash_info\n\n\ndef test_fill_from_lock_missing_checksums(dvc, lock_data):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        PROJECT_FILE,\n        deps=[\"foo\", \"foo1\"],\n        outs=[\"bar\", \"bar1\"],\n    )\n\n    StageLoader.fill_from_lock(stage, lock_data)\n\n    assert stage.deps[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n    assert not stage.deps[1].hash_info\n    assert not stage.outs[1].hash_info\n\n\ndef test_fill_from_lock_use_appropriate_checksum(dvc, lock_data):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        PROJECT_FILE,\n        deps=[\"s3://dvc-temp/foo\"],\n        outs=[\"bar\"],\n    )\n    lock_data[\"deps\"] = [{\"path\": \"s3://dvc-temp/foo\", \"etag\": \"e-tag\"}]\n    StageLoader.fill_from_lock(stage, lock_data)\n    assert stage.deps[0].hash_info == HashInfo(\"etag\", \"e-tag\")\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n\ndef test_fill_from_lock_with_missing_sections(dvc, lock_data):\n    stage = create_stage(PipelineStage, dvc, PROJECT_FILE, deps=[\"foo\"], outs=[\"bar\"])\n    lock = deepcopy(lock_data)\n    del lock[\"deps\"]\n    StageLoader.fill_from_lock(stage, lock)\n    assert not stage.deps[0].hash_info\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n    lock = deepcopy(lock_data)\n    del lock[\"outs\"]\n    StageLoader.fill_from_lock(stage, lock)\n    assert stage.deps[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert not stage.outs[0].hash_info\n\n\ndef test_fill_from_lock_empty_data(dvc):\n    stage = create_stage(PipelineStage, dvc, PROJECT_FILE, deps=[\"foo\"], outs=[\"bar\"])\n    StageLoader.fill_from_lock(stage, None)\n    assert not stage.deps[0].hash_info\n    assert not stage.outs[0].hash_info\n    StageLoader.fill_from_lock(stage, {})\n    assert not stage.deps[0].hash_info\n    assert not stage.outs[0].hash_info\n\n\ndef test_load_stage(dvc, stage_data, lock_data):\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n\n    assert stage.wdir == os.path.abspath(os.curdir)\n    assert stage.name == \"stage-1\"\n    assert stage.cmd == \"command\"\n    assert stage.path == os.path.abspath(PROJECT_FILE)\n    assert stage.deps[0].def_path == \"foo\"\n    assert stage.deps[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert stage.outs[0].def_path == \"bar\"\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n\ndef test_load_stage_cmd_with_list(dvc, stage_data, lock_data):\n    stage_data[\"cmd\"] = [\"cmd-0\", \"cmd-1\"]\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n    assert stage.cmd == [\"cmd-0\", \"cmd-1\"]\n\n\ndef test_load_stage_outs_with_flags(dvc, stage_data, lock_data):\n    stage_data[\"outs\"] = [{\"foo\": {\"cache\": False}}]\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n    assert stage.outs[0].use_cache is False\n\n\ndef test_load_stage_no_lock(dvc, stage_data):\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data)\n    assert stage.deps[0].def_path == \"foo\"\n    assert stage.outs[0].def_path == \"bar\"\n    assert not stage.deps[0].hash_info\n    assert not stage.outs[0].hash_info\n\n\ndef test_load_stage_with_params(dvc, stage_data, lock_data):\n    lock_data[\"params\"] = {\"params.yaml\": {\"lorem\": \"ipsum\"}}\n    stage_data[\"params\"] = [\"lorem\"]\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n\n    params, deps = split_params_deps(stage)\n    assert deps[0].def_path == \"foo\"\n    assert stage.outs[0].def_path == \"bar\"\n    assert params[0].def_path == \"params.yaml\"\n    assert params[0].hash_info == HashInfo(\"params\", {\"lorem\": \"ipsum\"})\n    assert deps[0].hash_info == HashInfo(\"md5\", \"foo_checksum\")\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n\n@pytest.mark.parametrize(\"typ\", [\"metrics\", \"plots\"])\ndef test_load_stage_with_metrics_and_plots(dvc, stage_data, lock_data, typ):\n    stage_data[typ] = stage_data.pop(\"outs\")\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n\n    assert stage.outs[0].def_path == \"bar\"\n    assert stage.outs[0].hash_info == HashInfo(\"md5\", \"bar_checksum\")\n\n\ndef test_load_changed_command(dvc, stage_data, lock_data):\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data)\n    assert not stage.cmd_changed\n    assert stage.cmd == \"command\"\n\n    lock_data[\"cmd\"] = \"different-command\"\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n    assert stage.cmd_changed\n    assert stage.cmd == \"command\"\n\n\ndef test_load_stage_wdir_and_path_correctly(dvc, stage_data, lock_data):\n    stage_data[\"wdir\"] = \"dir\"\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    stage = StageLoader.load_stage(dvcfile, \"stage-1\", stage_data, lock_data)\n\n    assert stage.wdir == os.path.abspath(\"dir\")\n    assert stage.path == os.path.abspath(PROJECT_FILE)\n\n\ndef test_load_stage_mapping(dvc, stage_data, lock_data):\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    dvcfile.contents = {\"stages\": {\"stage\": stage_data}}\n    dvcfile.lockfile_contents = {\"stage\": lock_data}\n\n    assert len(dvcfile.stages) == 1\n    assert \"stage\" in dvcfile.stages\n    assert \"stage1\" not in dvcfile.stages\n    assert dvcfile.stages.keys() == {\"stage\"}\n    assert isinstance(dvcfile.stages[\"stage\"], PipelineStage)\n\n\ndef test_fill_from_lock_dos2unix(dvc):\n    lock_data = {\n        \"cmd\": \"command\",\n        \"deps\": [{\"path\": \"foo\", \"md5\": \"foo_checksum\"}],\n        \"outs\": [{\"path\": \"bar\", \"md5\": \"bar_checksum\"}],\n    }\n    stage = create_stage(PipelineStage, dvc, PROJECT_FILE, deps=[\"foo\"], outs=[\"bar\"])\n\n    for item in chain(stage.deps, stage.outs):\n        assert not item.hash_info\n\n    StageLoader.fill_from_lock(stage, lock_data)\n\n    assert stage.deps[0].hash_info == HashInfo(\"md5-dos2unix\", \"foo_checksum\")\n    assert stage.outs[0].hash_info == HashInfo(\"md5-dos2unix\", \"bar_checksum\")\n"
  },
  {
    "path": "tests/unit/stage/test_run.py",
    "content": "import logging\n\nimport pytest\n\nfrom dvc.stage import Stage\nfrom dvc.stage.run import run_stage\n\n\n@pytest.mark.parametrize(\n    \"cmd, expected\",\n    [\n        (\"mycmd arg1 arg2\", [\"> mycmd arg1 arg2\"]),\n        ([\"mycmd1 arg1\", \"mycmd2 arg2\"], [\"> mycmd1 arg1\", \"> mycmd2 arg2\"]),\n    ],\n)\ndef test_run_stage_dry(caplog, dvc, cmd, expected):\n    with caplog.at_level(level=logging.INFO, logger=\"dvc\"):\n        stage = Stage(dvc, \"stage.dvc\", cmd=cmd)\n        run_stage(stage, dry=True)\n\n    expected.insert(0, \"Running stage 'stage.dvc':\")\n    assert caplog.messages == expected\n"
  },
  {
    "path": "tests/unit/stage/test_serialize_pipeline_file.py",
    "content": "import os\n\nimport pytest\nfrom voluptuous import Schema as _Schema\n\nfrom dvc import output\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.schema import SINGLE_PIPELINE_STAGE_SCHEMA\nfrom dvc.stage import PipelineStage, create_stage\nfrom dvc.stage.serialize import to_pipeline_file as _to_pipeline_file\n\nkwargs = {\"name\": \"something\", \"cmd\": \"command\", \"path\": PROJECT_FILE}\nSchema = _Schema(SINGLE_PIPELINE_STAGE_SCHEMA)\n\n\ndef to_pipeline_file(stage):\n    \"\"\"Validate schema on each serialization.\"\"\"\n    e = _to_pipeline_file(stage)\n    assert len(Schema(e)) == 1\n    return e\n\n\ndef test_cmd(dvc):\n    stage = create_stage(PipelineStage, dvc, **kwargs)\n    entry = to_pipeline_file(stage)\n    assert entry == {\"something\": {\"cmd\": \"command\"}}\n\n\ndef test_wdir(dvc):\n    stage = create_stage(PipelineStage, dvc, **kwargs)\n    assert stage.PARAM_WDIR not in to_pipeline_file(stage)[\"something\"]\n\n    stage.wdir = os.curdir\n    assert stage.PARAM_WDIR not in to_pipeline_file(stage)[\"something\"]\n\n    stage.wdir = \"some-dir\"\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_WDIR] == \"some-dir\"\n\n\ndef test_deps_sorted(dvc):\n    stage = create_stage(\n        PipelineStage, dvc, deps=[\"a\", \"quick\", \"lazy\", \"fox\"], **kwargs\n    )\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_DEPS] == [\n        \"a\",\n        \"fox\",\n        \"lazy\",\n        \"quick\",\n    ]\n\n\ndef test_outs_sorted(dvc):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        outs=[\"too\", \"many\", \"outs\"],\n        deps=[\"foo\"],\n        **kwargs,\n    )\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_OUTS] == [\n        \"many\",\n        \"outs\",\n        \"too\",\n    ]\n\n\ndef test_params_sorted(dvc):\n    params = [\n        \"lorem\",\n        \"ipsum\",\n        {\"custom.yaml\": [\"wxyz\", \"pqrs\", \"baz\"]},\n        {\"params.yaml\": [\"barr\"]},\n    ]\n    stage = create_stage(\n        PipelineStage, dvc, outs=[\"bar\"], deps=[\"foo\"], params=params, **kwargs\n    )\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_PARAMS] == [\n        \"barr\",\n        \"ipsum\",\n        \"lorem\",\n        {\"custom.yaml\": [\"baz\", \"pqrs\", \"wxyz\"]},\n    ]\n\n\ndef test_params_file_sorted(dvc):\n    params = [\n        \"lorem\",\n        \"ipsum\",\n        {\"custom.yaml\": [\"wxyz\", \"pqrs\", \"baz\"]},\n        {\"a-file-of-params.yaml\": [\"barr\"]},\n    ]\n    stage = create_stage(\n        PipelineStage, dvc, outs=[\"bar\"], deps=[\"foo\"], params=params, **kwargs\n    )\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_PARAMS] == [\n        \"ipsum\",\n        \"lorem\",\n        {\"a-file-of-params.yaml\": [\"barr\"]},\n        {\"custom.yaml\": [\"baz\", \"pqrs\", \"wxyz\"]},\n    ]\n\n\ndef test_params_file_without_targets(dvc):\n    params = [\n        \"foo\",\n        \"bar\",\n        {\"params.yaml\": None},\n        {\"custom.yaml\": [\"wxyz\", \"pqrs\", \"baz\"]},\n        {\"a-file-of-params.yaml\": None},\n        {\"a-file-of-params.yaml\": [\"barr\"]},\n    ]\n    stage = create_stage(\n        PipelineStage, dvc, outs=[\"bar\"], deps=[\"foo\"], params=params, **kwargs\n    )\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_PARAMS] == [\n        {\"a-file-of-params.yaml\": None},\n        {\"custom.yaml\": [\"baz\", \"pqrs\", \"wxyz\"]},\n        {\"params.yaml\": None},\n    ]\n\n\n@pytest.mark.parametrize(\n    \"typ, extra\",\n    [(\"plots\", {\"plot\": True}), (\"metrics\", {\"metric\": True}), (\"outs\", {})],\n)\ndef test_outs_and_outs_flags_are_sorted(dvc, typ, extra):\n    stage = create_stage(PipelineStage, dvc, deps=[\"input\"], **kwargs)\n    stage.outs += output.loads_from(stage, [\"barr\"], use_cache=False, **extra)\n    stage.outs += output.loads_from(\n        stage, [\"foobar\"], use_cache=False, persist=True, **extra\n    )\n    stage.outs += output.loads_from(stage, [\"foo\"], persist=True, **extra)\n    stage.outs += output.loads_from(stage, [\"bar\"], **extra)\n\n    serialized_outs = to_pipeline_file(stage)[\"something\"][typ]\n    assert serialized_outs == [\n        \"bar\",\n        {\"barr\": {\"cache\": False}},\n        {\"foo\": {\"persist\": True}},\n        {\"foobar\": {\"cache\": False, \"persist\": True}},\n    ]\n    assert list(serialized_outs[3][\"foobar\"].keys()) == [\"cache\", \"persist\"]\n\n\ndef test_plot_props(dvc):\n    props = {\"x\": \"1\"}\n    stage = create_stage(PipelineStage, dvc, plots=[\"plot_file\"], **kwargs)\n    stage.outs[0].plot = props\n\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_PLOTS] == [\n        {\"plot_file\": props}\n    ]\n\n\ndef test_frozen(dvc):\n    stage = create_stage(PipelineStage, dvc, outs=[\"output\"], deps=[\"input\"], **kwargs)\n    assert stage.PARAM_FROZEN not in to_pipeline_file(stage)[\"something\"]\n\n    stage = create_stage(PipelineStage, dvc, **kwargs, frozen=True)\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_FROZEN] is True\n\n\ndef test_always_changed(dvc):\n    stage = create_stage(PipelineStage, dvc, outs=[\"output\"], deps=[\"input\"], **kwargs)\n    assert stage.PARAM_ALWAYS_CHANGED not in to_pipeline_file(stage)[\"something\"]\n\n    stage = create_stage(PipelineStage, dvc, **kwargs, always_changed=True)\n    assert to_pipeline_file(stage)[\"something\"][stage.PARAM_ALWAYS_CHANGED] is True\n\n\ndef test_order(dvc):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        outs=[\"output\"],\n        deps=[\"input\"],\n        **kwargs,\n        always_changed=True,\n        frozen=True,\n    )\n    # `create_stage` checks for existence of `wdir`\n    stage.wdir = \"some-dir\"\n    assert list(to_pipeline_file(stage)[\"something\"].keys()) == [\n        \"cmd\",\n        \"wdir\",\n        \"deps\",\n        \"outs\",\n        \"frozen\",\n        \"always_changed\",\n    ]\n\n\n@pytest.mark.parametrize(\"typ\", [\"outs\", \"metrics\", \"plots\", \"params\", \"deps\", None])\ndef test_order_deps_outs(dvc, typ):\n    all_types = [\"deps\", \"params\", \"outs\", \"metrics\", \"plots\"]\n    all_types = [item for item in all_types if item != typ]\n    extra = {key: [f\"foo-{i}\"] for i, key in enumerate(all_types)}\n\n    stage = create_stage(PipelineStage, dvc, **kwargs, **extra)\n    assert typ not in to_pipeline_file(stage)[\"something\"]\n    assert list(to_pipeline_file(stage)[\"something\"].keys()) == [\"cmd\", *all_types]\n"
  },
  {
    "path": "tests/unit/stage/test_serialize_pipeline_lock.py",
    "content": "from collections import OrderedDict\n\nimport pytest\nfrom voluptuous import Schema as _Schema\n\nfrom dvc.dvcfile import PROJECT_FILE\nfrom dvc.schema import LOCK_FILE_STAGE_SCHEMA, LOCKFILE_STAGES_SCHEMA\nfrom dvc.stage import PipelineStage, create_stage\nfrom dvc.stage.serialize import DEFAULT_PARAMS_FILE, to_lockfile\nfrom dvc.stage.serialize import to_single_stage_lockfile as _to_single_stage_lockfile\nfrom dvc.stage.utils import split_params_deps\nfrom dvc_data.hashfile.hash_info import HashInfo\n\nkwargs = {\"name\": \"something\", \"cmd\": \"command\", \"path\": PROJECT_FILE}\nSchema = _Schema(LOCK_FILE_STAGE_SCHEMA)\n\n\ndef to_single_stage_lockfile(stage):\n    \"\"\"Validate schema on each serialization.\"\"\"\n    e = _to_single_stage_lockfile(stage)\n    assert Schema(e)\n    return e\n\n\ndef test_lock(dvc):\n    stage = create_stage(PipelineStage, dvc, **kwargs)\n    assert to_single_stage_lockfile(stage) == {\"cmd\": \"command\"}\n\n\ndef test_lock_deps(dvc):\n    stage = create_stage(PipelineStage, dvc, deps=[\"input\"], **kwargs)\n    stage.deps[0].hash_info = HashInfo(\"md5\", \"md-five\")\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"deps\",\n                [OrderedDict([(\"hash\", \"md5\"), (\"path\", \"input\"), (\"md5\", \"md-five\")])],\n            ),\n        ]\n    )\n\n\ndef test_lock_deps_order(dvc):\n    stage = create_stage(PipelineStage, dvc, deps=[\"input1\", \"input0\"], **kwargs)\n    stage.deps[0].hash_info = HashInfo(\"md5\", \"md-one1\")\n    stage.deps[1].hash_info = HashInfo(\"md5\", \"md-zer0\")\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"deps\",\n                [\n                    OrderedDict(\n                        [(\"hash\", \"md5\"), (\"path\", \"input0\"), (\"md5\", \"md-zer0\")]\n                    ),\n                    OrderedDict(\n                        [(\"hash\", \"md5\"), (\"path\", \"input1\"), (\"md5\", \"md-one1\")]\n                    ),\n                ],\n            ),\n        ]\n    )\n\n\ndef test_lock_params(dvc):\n    stage = create_stage(PipelineStage, dvc, params=[\"lorem.ipsum\", \"abc\"], **kwargs)\n    stage.deps[0].hash_info = HashInfo(\n        \"params\", {\"lorem.ipsum\": {\"lorem1\": 1, \"lorem2\": 2}, \"abc\": 3}\n    )\n    assert to_single_stage_lockfile(stage)[\"params\"][\n        DEFAULT_PARAMS_FILE\n    ] == OrderedDict([(\"abc\", 3), (\"lorem.ipsum\", {\"lorem1\": 1, \"lorem2\": 2})])\n\n\ndef test_lock_params_file_sorted(dvc):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        params=[\n            \"lorem.ipsum\",\n            \"abc\",\n            {\"myparams.yaml\": [\"foo\", \"foobar\"]},\n            {\"a-params-file.yaml\": [\"bar\", \"barr\"]},\n        ],\n        **kwargs,\n    )\n    stage.deps[0].hash_info = HashInfo(\n        \"params\", {\"lorem.ipsum\": {\"lorem1\": 1, \"lorem2\": 2}, \"abc\": 3}\n    )\n    stage.deps[1].hash_info = HashInfo(\n        \"params\", {\"foo\": [\"f\", \"o\", \"o\"], \"foobar\": \"foobar\"}\n    )\n    stage.deps[2].hash_info = HashInfo(\n        \"params\", {\"bar\": [\"b\", \"a\", \"r\"], \"barr\": \"barr\"}\n    )\n    assert to_single_stage_lockfile(stage)[\"params\"] == OrderedDict(\n        [\n            (\n                DEFAULT_PARAMS_FILE,\n                OrderedDict([(\"abc\", 3), (\"lorem.ipsum\", {\"lorem1\": 1, \"lorem2\": 2})]),\n            ),\n            (\n                \"a-params-file.yaml\",\n                OrderedDict([(\"bar\", [\"b\", \"a\", \"r\"]), (\"barr\", \"barr\")]),\n            ),\n            (\n                \"myparams.yaml\",\n                OrderedDict([(\"foo\", [\"f\", \"o\", \"o\"]), (\"foobar\", \"foobar\")]),\n            ),\n        ]\n    )\n\n\ndef test_lock_params_no_values_filled(dvc):\n    stage = create_stage(PipelineStage, dvc, params=[\"lorem.ipsum\", \"abc\"], **kwargs)\n    assert to_single_stage_lockfile(stage) == {\"cmd\": \"command\"}\n\n\n@pytest.mark.parametrize(\n    \"info, expected\",\n    [\n        (None, {}),\n        ({}, {}),\n        ({\"foo\": \"foo\", \"bar\": \"bar\"}, {\"bar\": \"bar\", \"foo\": \"foo\"}),\n    ],\n)\ndef test_lock_params_without_targets(dvc, info, expected):\n    stage = create_stage(PipelineStage, dvc, params=[{\"params.yaml\": None}], **kwargs)\n    stage.deps[0].fill_values(info)\n    assert to_single_stage_lockfile(stage) == {\n        \"cmd\": \"command\",\n        \"params\": {\"params.yaml\": OrderedDict(expected)},\n    }\n\n\n@pytest.mark.parametrize(\"typ\", [\"plots\", \"metrics\", \"outs\"])\ndef test_lock_outs(dvc, typ):\n    stage = create_stage(PipelineStage, dvc, **{typ: [\"input\"]}, **kwargs)\n    stage.outs[0].hash_info = HashInfo(\"md5\", \"md-five\")\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"outs\",\n                [OrderedDict([(\"hash\", \"md5\"), (\"path\", \"input\"), (\"md5\", \"md-five\")])],\n            ),\n        ]\n    )\n\n\n@pytest.mark.parametrize(\"typ\", [\"plots\", \"metrics\", \"outs\"])\ndef test_lock_outs_isexec(dvc, typ):\n    stage = create_stage(PipelineStage, dvc, **{typ: [\"input\"]}, **kwargs)\n    stage.outs[0].hash_info = HashInfo(\"md5\", \"md-five\")\n    stage.outs[0].meta.isexec = True\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"outs\",\n                [\n                    OrderedDict(\n                        [\n                            (\"hash\", \"md5\"),\n                            (\"path\", \"input\"),\n                            (\"md5\", \"md-five\"),\n                            (\"isexec\", True),\n                        ]\n                    )\n                ],\n            ),\n        ]\n    )\n\n\n@pytest.mark.parametrize(\"typ\", [\"plots\", \"metrics\", \"outs\"])\ndef test_lock_outs_order(dvc, typ):\n    stage = create_stage(PipelineStage, dvc, **{typ: [\"input1\", \"input0\"]}, **kwargs)\n    stage.outs[0].hash_info = HashInfo(\"md5\", \"md-one1\")\n    stage.outs[1].hash_info = HashInfo(\"md5\", \"md-zer0\")\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"outs\",\n                [\n                    OrderedDict(\n                        [(\"hash\", \"md5\"), (\"path\", \"input0\"), (\"md5\", \"md-zer0\")]\n                    ),\n                    OrderedDict(\n                        [(\"hash\", \"md5\"), (\"path\", \"input1\"), (\"md5\", \"md-one1\")]\n                    ),\n                ],\n            ),\n        ]\n    )\n\n\ndef test_dump_nondefault_hash(dvc):\n    stage = create_stage(PipelineStage, dvc, deps=[\"s3://dvc-temp/file\"], **kwargs)\n    stage.deps[0].hash_info = HashInfo(\"md5\", \"value\")\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\n                \"deps\",\n                [\n                    OrderedDict(\n                        [\n                            (\"hash\", \"md5\"),\n                            (\"path\", \"s3://dvc-temp/file\"),\n                            (\"md5\", \"value\"),\n                        ]\n                    )\n                ],\n            ),\n        ]\n    )\n\n\ndef test_order(dvc):\n    stage = create_stage(\n        PipelineStage,\n        dvc,\n        deps=[\"input\"],\n        outs=[\"output\"],\n        params=[\"foo-param\"],\n        **kwargs,\n    )\n    params, deps = split_params_deps(stage)\n\n    deps[0].hash_info = HashInfo(\"md5\", \"md-five\")\n    params[0].hash_info = HashInfo(\"params\", {\"foo-param\": \"value\"})\n    stage.outs[0].hash_info = HashInfo(\"md5\", \"md5-output\")\n\n    assert to_single_stage_lockfile(stage) == OrderedDict(\n        [\n            (\"cmd\", \"command\"),\n            (\"deps\", [{\"hash\": \"md5\", \"path\": \"input\", \"md5\": \"md-five\"}]),\n            (\"params\", {\"params.yaml\": {\"foo-param\": \"value\"}}),\n            (\"outs\", [{\"hash\": \"md5\", \"path\": \"output\", \"md5\": \"md5-output\"}]),\n        ]\n    )\n\n\ndef test_to_lockfile(dvc):\n    stage = create_stage(PipelineStage, dvc, deps=[\"input\"], **kwargs)\n    stage.deps[0].hash_info = HashInfo(\"md5\", \"md-five\")\n    entry = to_lockfile(stage)\n    assert len(entry) == 1\n    _Schema(LOCKFILE_STAGES_SCHEMA)(entry)\n    assert entry == {\n        \"something\": OrderedDict(\n            [\n                (\"cmd\", \"command\"),\n                (\"deps\", [{\"hash\": \"md5\", \"path\": \"input\", \"md5\": \"md-five\"}]),\n            ]\n        )\n    }\n\n\ndef test_to_single_stage_lockfile_cloud_versioning_dir(dvc):\n    stage = create_stage(PipelineStage, dvc, outs=[\"dir\"], **kwargs)\n    stage.outs[0].hash_info = HashInfo(\"md5\", \"md-five.dir\")\n    files = [\n        {\n            \"size\": 3,\n            \"version_id\": \"WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"bar\",\n        },\n        {\n            \"size\": 3,\n            \"version_id\": \"0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0\",\n            \"etag\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"md5\": \"acbd18db4cc2f85cedef654fccc4a4d8\",\n            \"relpath\": \"foo\",\n        },\n    ]\n    stage.outs[0].files = files\n    e = _to_single_stage_lockfile(stage, with_files=True)\n    assert Schema(e)\n    assert e[\"outs\"][0] == {\"hash\": \"md5\", \"path\": \"dir\", \"files\": files}\n"
  },
  {
    "path": "tests/unit/stage/test_stage.py",
    "content": "import os\nimport signal\nimport subprocess\nimport threading\n\nimport pytest\n\nfrom dvc.dependency.repo import RepoDependency\nfrom dvc.stage import Stage\nfrom dvc.stage.exceptions import StageUpdateError\n\nTEST_STAGE_DICT = {\n    \"md5\": \"123456\",\n    \"cmd\": \"mycmd\",\n    \"outs\": [{\"path\": \"a\", \"md5\": \"123456789\"}],\n    \"deps\": [{\"path\": \"b\", \"md5\": \"987654321\"}],\n}\n\n\ndef test_stage_checksum(mocker):\n    stage = Stage(None, \"path\", cmd=\"mycmd\")\n\n    mocker.patch.object(stage, \"dumpd\", return_value=TEST_STAGE_DICT)\n    assert stage.compute_md5() == \"e9521a22111493406ea64a88cda63e0b\"\n\n\ndef test_wdir_default_ignored(mocker):\n    stage = Stage(None, \"path\", cmd=\"mycmd\")\n    d = dict(TEST_STAGE_DICT, wdir=\".\")\n\n    mocker.patch.object(stage, \"dumpd\", return_value=d)\n    assert stage.compute_md5() == \"e9521a22111493406ea64a88cda63e0b\"\n\n\ndef test_wdir_non_default_is_not_ignored(mocker):\n    stage = Stage(None, \"path\", cmd=\"mycmd\")\n    d = dict(TEST_STAGE_DICT, wdir=\"..\")\n\n    mocker.patch.object(stage, \"dumpd\", return_value=d)\n    assert stage.compute_md5() == \"2ceba15e87f6848aa756502c1e6d24e9\"\n\n\ndef test_meta_ignored(mocker):\n    stage = Stage(None, \"path\", cmd=\"mycmd\")\n    d = dict(TEST_STAGE_DICT, meta={\"author\": \"Suor\"})\n\n    mocker.patch.object(stage, \"dumpd\", return_value=d)\n    assert stage.compute_md5() == \"e9521a22111493406ea64a88cda63e0b\"\n\n\ndef test_path_conversion(dvc):\n    stage = Stage(dvc, \"path\")\n\n    stage.wdir = os.path.join(\"..\", \"..\")\n    assert stage.dumpd()[\"wdir\"] == \"../..\"\n\n\ndef test_stage_update(dvc, mocker):\n    stage = Stage(dvc, \"path\", \"cmd\")\n    dep = RepoDependency({\"url\": \"example.com\"}, stage, \"dep_path\")\n    mocker.patch.object(dep, \"update\", return_value=None)\n\n    stage = Stage(dvc, \"path\", deps=[dep])\n    reproduce = mocker.patch.object(stage, \"reproduce\")\n    is_repo_import = mocker.patch(\n        __name__ + \".Stage.is_repo_import\", new_callable=mocker.PropertyMock\n    )\n\n    is_repo_import.return_value = True\n    with dvc.lock:\n        stage.update()\n    reproduce.assert_called_once_with(no_download=None, jobs=None, force=False)\n\n    is_repo_import.return_value = False\n    with pytest.raises(StageUpdateError):\n        stage.update()\n\n\n@pytest.mark.skipif(\n    not isinstance(threading.current_thread(), threading._MainThread),\n    reason=\"Not running in the main thread.\",\n)\ndef test_stage_run_ignore_sigint(dvc, mocker):\n    proc = mocker.Mock()\n    communicate = mocker.Mock()\n    proc.configure_mock(returncode=0, communicate=communicate)\n    popen = mocker.patch.object(subprocess, \"Popen\", return_value=proc)\n    signal_mock = mocker.patch(\"signal.signal\")\n\n    dvc.run(cmd=\"path\", name=\"train\")\n\n    popen.assert_called_once()\n    communicate.assert_called_once_with()\n    signal_mock.assert_any_call(signal.SIGINT, signal.SIG_IGN)\n    assert signal.getsignal(signal.SIGINT) == signal.default_int_handler\n\n\ndef test_always_changed(dvc):\n    stage = Stage(dvc, \"path\", always_changed=True)\n    stage.save()\n    with dvc.lock:\n        assert stage.changed()\n        assert stage.status()[\"path\"] == [\"always changed\"]\n\n\ndef test_external_outs(tmp_path_factory, dvc):\n    from dvc.stage import create_stage\n    from dvc.stage.exceptions import StageExternalOutputsError\n\n    tmp_path = tmp_path_factory.mktemp(\"external-outs\")\n    foo = tmp_path / \"foo\"\n    foo.write_text(\"foo\")\n\n    with pytest.raises(StageExternalOutputsError):\n        create_stage(Stage, dvc, \"path.dvc\", outs=[os.fspath(foo)])\n\n    with dvc.config.edit() as conf:\n        conf[\"remote\"][\"myremote\"] = {\"url\": os.fspath(tmp_path)}\n\n    with pytest.raises(StageExternalOutputsError):\n        create_stage(Stage, dvc, \"path.dvc\", outs=[\"remote://myremote/foo\"])\n\n    create_stage(Stage, dvc, \"path.dvc\", outs_no_cache=[\"remote://myremote/foo\"])\n    create_stage(Stage, dvc, \"path.dvc\", outs_no_cache=[os.fspath(foo)])\n"
  },
  {
    "path": "tests/unit/stage/test_utils.py",
    "content": "import os\n\nfrom dvc.fs import localfs\nfrom dvc.stage.utils import _get_stage_files, resolve_paths\n\n\ndef test_resolve_paths():\n    p = os.path.join(\"dir\", \"subdir\")\n    file_path = os.path.join(p, \"dvc.yaml\")\n\n    path, wdir = resolve_paths(fs=localfs, path=file_path, wdir=\"dir\")\n    assert path == os.path.abspath(file_path)\n    assert wdir == os.path.abspath(os.path.join(p, \"dir\"))\n\n    path, wdir = resolve_paths(fs=localfs, path=file_path)\n    assert path == os.path.abspath(file_path)\n    assert wdir == os.path.abspath(p)\n\n    path, wdir = resolve_paths(fs=localfs, path=file_path, wdir=\"../../some-dir\")\n    assert path == os.path.abspath(file_path)\n    assert wdir == os.path.abspath(\"some-dir\")\n\n\ndef test_get_stage_files(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"dvc-dep\", \"dvc-dep\")\n    tmp_dir.gen(\"other-dep\", \"other-dep\")\n    stage = dvc.stage.create(\n        name=\"stage\",\n        cmd=\"foo\",\n        deps=[\"dvc-dep\", \"other-dep\"],\n        outs=[\"dvc-out\"],\n        outs_no_cache=[\"other-out\"],\n    )\n    assert _get_stage_files(stage) == [\"dvc.yaml\", \"dvc.lock\", \"other-dep\", \"other-out\"]\n\n\ndef test_get_stage_files_wdir(tmp_dir, dvc):\n    tmp_dir.gen({\"dir\": {\"dvc-dep\": \"dvc-dep\", \"other-dep\": \"other-dep\"}})\n    dvc.add(os.path.join(\"dir\", \"dvc-dep\"))\n    stage = dvc.stage.create(\n        name=\"stage\",\n        cmd=\"foo\",\n        wdir=\"dir\",\n        deps=[\"dvc-dep\", \"other-dep\"],\n        outs=[\"dvc-out\"],\n        outs_no_cache=[\"other-out\"],\n    )\n    assert _get_stage_files(stage) == [\n        \"dvc.yaml\",\n        \"dvc.lock\",\n        os.path.join(\"dir\", \"other-dep\"),\n        os.path.join(\"dir\", \"other-out\"),\n    ]\n"
  },
  {
    "path": "tests/unit/test_analytics.py",
    "content": "import hashlib\nimport json\nimport platform\n\nimport pytest\nfrom voluptuous import Any, Schema\n\nfrom dvc import analytics, env\nfrom dvc.cli import parse_args\n\n\n@pytest.fixture\ndef tmp_global_dir(mocker, tmp_path):\n    \"\"\"\n    Fixture to prevent modifying the actual global config\n    \"\"\"\n\n    def _user_config_dir(appname, *_args, **_kwargs):\n        return str(tmp_path / appname)\n\n    mocker.patch(\"iterative_telemetry.user_config_dir\", _user_config_dir)\n\n\ndef test_collect_and_send_report(mocker, tmp_global_dir):\n    mock_json = mocker.patch(\"json.dump\")\n    mock_daemon = mocker.patch(\"dvc.daemon._spawn\")\n    analytics.collect_and_send_report()\n    report = mock_json.call_args[0][0]\n\n    assert not report.get(\"cmd_class\")\n    assert not report.get(\"cmd_return_code\")\n\n    args = parse_args([\"add\", \"foo\"])\n    return_code = 0\n\n    analytics.collect_and_send_report(args, return_code)\n    report = mock_json.call_args[0][0]\n\n    assert report[\"cmd_class\"] == \"CmdAdd\"\n    assert report[\"cmd_return_code\"] == return_code\n\n    assert mock_daemon.call_count == 2\n\n\ndef test_runtime_info(tmp_global_dir):\n    schema = Schema(\n        {\n            \"dvc_version\": str,\n            \"is_binary\": bool,\n            \"scm_class\": Any(\"Git\", None),\n            \"user_id\": str,\n            \"system_info\": dict,\n            \"group_id\": Any(str, None),\n            \"remotes\": Any(list, None),\n            \"git_remote_hash\": Any(str, None),\n        },\n        required=True,\n    )\n\n    assert schema(analytics._runtime_info())\n\n\ndef test_send(monkeypatch, mocker, tmp_path):\n    monkeypatch.delenv(env.DVC_ANALYTICS_ENDPOINT, raising=False)\n    mock_post = mocker.patch(\"requests.post\")\n\n    import requests\n\n    url = \"https://analytics.dvc.org\"\n    report = {\"name\": \"dummy report\"}\n    report_file = tmp_path / \"report\"\n\n    report_file.write_text(json.dumps(report))\n    mock_post.side_effect = requests.exceptions.RequestException\n\n    analytics.send(str(report_file))\n    assert mock_post.called\n    assert mock_post.call_args[0][0] == url\n    assert not report_file.exists()\n\n\n@pytest.mark.parametrize(\n    \"config, result\",\n    [\n        ({}, True),\n        ({\"analytics\": \"false\"}, False),\n        ({\"analytics\": \"true\"}, True),\n        ({\"unknown\": \"broken\"}, True),\n        ({\"analytics\": \"false\", \"unknown\": \"broken\"}, False),\n    ],\n)\ndef test_is_enabled(dvc, config, result, monkeypatch, tmp_global_dir):\n    with dvc.config.edit(validate=False) as conf:\n        conf[\"core\"] = config\n\n    # reset DVC_TEST env var, which affects `is_enabled()`\n    monkeypatch.delenv(\"DVC_TEST\")\n    monkeypatch.delenv(\"DVC_NO_ANALYTICS\", raising=False)\n\n    assert result == analytics.is_enabled()\n\n\n@pytest.mark.parametrize(\n    \"config, env, result\",\n    [\n        (None, None, True),\n        (None, \"true\", False),\n        (None, \"false\", False),  # only checking if env is set\n        (\"false\", None, False),\n        (\"false\", \"true\", False),\n        (\"false\", \"false\", False),\n        (\"true\", None, True),\n        (\"true\", \"true\", False),\n        (\"true\", \"false\", False),  # we checking if env is set\n    ],\n)\ndef test_is_enabled_env_neg(dvc, config, env, result, monkeypatch, tmp_global_dir):\n    # reset DVC_TEST env var, which affects `is_enabled()`\n    monkeypatch.delenv(\"DVC_TEST\")\n    monkeypatch.delenv(\"DVC_NO_ANALYTICS\", raising=False)\n\n    with dvc.config.edit() as conf:\n        conf[\"core\"] = {}\n\n    assert analytics.is_enabled()\n\n    if config is not None:\n        with dvc.config.edit() as conf:\n            conf[\"core\"] = {\"analytics\": config}\n\n    if env is not None:\n        monkeypatch.setenv(\"DVC_NO_ANALYTICS\", env)\n\n    assert result == analytics.is_enabled()\n\n\ndef test_system_info():\n    schema = Schema({\"os\": Any(\"windows\", \"mac\", \"linux\")}, required=True)\n\n    system = platform.system()\n\n    if system == \"Windows\":\n        schema = schema.extend(\n            {\n                \"windows_version_build\": int,\n                \"windows_version_major\": int,\n                \"windows_version_minor\": int,\n                \"windows_version_service_pack\": str,\n            }\n        )\n\n    if system == \"Darwin\":\n        schema = schema.extend({\"mac_version\": str})\n\n    if system == \"Linux\":\n        schema = schema.extend(\n            {\n                \"linux_distro\": str,\n                \"linux_distro_like\": str,\n                \"linux_distro_version\": str,\n            }\n        )\n\n    assert schema(analytics._system_info())\n\n\n@pytest.mark.parametrize(\n    \"git_remote\",\n    [\n        \"git://github.com/treeverse/dvc.git\",\n        \"git@github.com:treeverse/dvc.git\",\n        \"http://github.com/treeverse/dvc.git\",\n        \"https://github.com/treeverse/dvc.git\",\n        \"ssh://git@github.com/treeverse/dvc.git\",\n    ],\n)\ndef test_git_remote_hash(mocker, git_remote):\n    m = mocker.patch(\"dvc.analytics._git_remote_url\", return_value=git_remote)\n    expected = hashlib.md5(b\"treeverse/dvc.git\", usedforsecurity=False).hexdigest()\n\n    assert analytics._git_remote_path_hash(None) == expected\n    m.assert_called_once_with(None)\n\n\n@pytest.mark.parametrize(\n    \"git_remote\",\n    [\n        \"C:\\\\Users\\\\user\\\\dvc.git\",\n        \"/home/user/dvc.git\",\n        \"file:///home/user/dvc.git\",\n        \"./dvc.git\",\n    ],\n)\ndef test_git_remote_hash_local(mocker, git_remote):\n    m = mocker.patch(\"dvc.analytics._git_remote_url\", return_value=git_remote)\n\n    expected = hashlib.md5(\n        git_remote.encode(\"utf-8\"), usedforsecurity=False\n    ).hexdigest()\n    assert analytics._git_remote_path_hash(None) == expected\n    m.assert_called_once_with(None)\n"
  },
  {
    "path": "tests/unit/test_api.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc import api\n\n\ndef test_open_raises_error_if_no_context(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo-text\")\n\n    fd = api.open(\"foo\")\n    with pytest.raises(\n        AttributeError, match=re.escape(\"should be used in a with statement.\")\n    ):\n        fd.read()\n\n\ndef test_open_rev_raises_error_on_wrong_mode(tmp_dir, dvc):\n    tmp_dir.dvc_gen(\"foo\", \"foo-text\")\n\n    with pytest.raises(\n        ValueError, match=re.escape(\"Only reading `mode` is supported.\")\n    ):\n        with api.open(\"foo\", mode=\"w\"):\n            pass\n"
  },
  {
    "path": "tests/unit/test_collect.py",
    "content": "from dvc.repo.collect import collect\n\n\ndef test_collect_duplicates(tmp_dir, scm, dvc):\n    tmp_dir.gen(\"params.yaml\", \"foo: 1\\nbar: 2\")\n    tmp_dir.gen(\"foobar\", \"\")\n\n    dvc.run(name=\"stage-1\", cmd=\"echo stage-1\", params=[\"foo\"])\n    dvc.run(name=\"stage-2\", cmd=\"echo stage-2\", params=[\"bar\"])\n\n    outs, _ = collect(dvc, deps=True, targets=[\"params.yaml\"])\n    assert len(outs) == 1\n\n    outs, _ = collect(dvc, deps=True, targets=[\"params.yaml\"], duplicates=True)\n    assert len(outs) == 2\n\n    outs, _ = collect(dvc, deps=True, targets=[\"foobar\"], duplicates=True)\n    assert not outs\n"
  },
  {
    "path": "tests/unit/test_compare.py",
    "content": "import textwrap\n\nimport pytest\n\nfrom dvc.compare import diff_table, metrics_table, show_diff, show_metrics\nfrom dvc.utils.serialize import YAMLFileCorruptedError\n\n\n@pytest.mark.parametrize(\"title\", [\"Metric\", \"Param\"])\ndef test_diff_table(title):\n    td = diff_table(\n        {\"metrics.json\": {\"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 3}}},\n        title=title,\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            title: \"a.b.c\",\n            \"HEAD\": \"1\",\n            \"workspace\": \"2\",\n            \"Change\": \"3\",\n        }\n    ]\n\n\ndef test_diff_table_with_value_column():\n    td = diff_table(\n        {\"metrics.json\": {\"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 3}}},\n        title=\"Metric\",\n        old=False,\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            \"Metric\": \"a.b.c\",\n            \"Value\": \"2\",\n            \"Change\": \"3\",\n        }\n    ]\n\n\ndef test_no_path():\n    td = diff_table(\n        {\"metrics.json\": {\"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 3}}},\n        title=\"Metric\",\n        no_path=True,\n    )\n    assert td.as_dict() == [\n        {\"Metric\": \"a.b.c\", \"HEAD\": \"1\", \"workspace\": \"2\", \"Change\": \"3\"}\n    ]\n\n\ndef test_do_not_show_changes():\n    td = diff_table(\n        {\"metrics.json\": {\"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 3}}},\n        title=\"Metric\",\n        show_changes=False,\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            \"Metric\": \"a.b.c\",\n            \"HEAD\": \"1\",\n            \"workspace\": \"2\",\n        }\n    ]\n\n\ndef test_diff_table_precision():\n    diff = {\"metrics.json\": {\"a.b.c\": {\"old\": 1.1234, \"new\": 2.2345, \"diff\": 3.3456}}}\n    td = diff_table(diff, title=\"Metric\", precision=3)\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            \"Metric\": \"a.b.c\",\n            \"HEAD\": \"1.12\",\n            \"workspace\": \"2.23\",\n            \"Change\": \"3.35\",\n        }\n    ]\n\n\ndef test_diff_table_rounding():\n    diff = {\"metrics.json\": {\"a.b.c\": {\"old\": 1.1234, \"new\": 2.2345, \"diff\": 3.3456}}}\n    td = diff_table(diff, title=\"Metric\", precision=3, round_digits=True)\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            \"Metric\": \"a.b.c\",\n            \"HEAD\": \"1.123\",\n            \"workspace\": \"2.235\",\n            \"Change\": \"3.346\",\n        }\n    ]\n\n\n@pytest.mark.parametrize(\n    \"extra, expected\", [({\"on_empty_diff\": \"no diff\"}, \"no diff\"), ({}, \"-\")]\n)\ndef test_diff_unsupported_diff_message(extra, expected):\n    td = diff_table(\n        {\"metrics.json\": {\"\": {\"old\": \"1\", \"new\": \"2\"}}},\n        title=\"Metric\",\n        **extra,\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.json\",\n            \"Metric\": \"\",\n            \"HEAD\": \"1\",\n            \"workspace\": \"2\",\n            \"Change\": expected,\n        }\n    ]\n\n\ndef test_diff_new():\n    td = diff_table(\n        {\"param.json\": {\"a.b.d\": {\"old\": None, \"new\": \"new\"}}}, title=\"Param\"\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"param.json\",\n            \"Param\": \"a.b.d\",\n            \"HEAD\": \"-\",\n            \"workspace\": \"new\",\n            \"Change\": \"-\",\n        }\n    ]\n\n\ndef test_diff_old_deleted():\n    td = diff_table(\n        {\"metric.json\": {\"a.b.d\": {\"old\": \"old\", \"new\": None}}}, title=\"Metric\"\n    )\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metric.json\",\n            \"Metric\": \"a.b.d\",\n            \"HEAD\": \"old\",\n            \"workspace\": \"-\",\n            \"Change\": \"-\",\n        }\n    ]\n\n\ndef test_diff_sorted():\n    td = diff_table(\n        {\n            \"metrics.yaml\": {\n                \"x.b\": {\"old\": 5, \"new\": 6, \"diff\": 1},\n                \"a.d.e\": {\"old\": 3, \"new\": 4, \"diff\": 1},\n                \"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 1},\n            }\n        },\n        \"Metric\",\n    )\n    assert list(td) == [\n        [\"metrics.yaml\", \"a.b.c\", \"1\", \"2\", \"1\"],\n        [\"metrics.yaml\", \"a.d.e\", \"3\", \"4\", \"1\"],\n        [\"metrics.yaml\", \"x.b\", \"5\", \"6\", \"1\"],\n    ]\n\n\ndef test_diff_falsey_values():\n    diff = {\"metrics.yaml\": {\"x.b\": {\"old\": 0, \"new\": 0.0, \"diff\": 0.0}}}\n    td = diff_table(diff, \"Metric\")\n    assert td.as_dict() == [\n        {\n            \"Path\": \"metrics.yaml\",\n            \"Metric\": \"x.b\",\n            \"HEAD\": \"0\",\n            \"workspace\": \"0.0\",\n            \"Change\": \"0.0\",\n        }\n    ]\n\n\n@pytest.mark.parametrize(\n    \"composite, expected\",\n    [([2, 3], \"[2, 3]\"), ({\"foo\": 3, \"bar\": 3}, \"{'foo': 3, 'bar': 3}\")],\n)\ndef test_diff_list(composite, expected):\n    td = diff_table({\"params.yaml\": {\"a.b.c\": {\"old\": 1, \"new\": composite}}}, \"Param\")\n    assert td.as_dict() == [\n        {\n            \"Path\": \"params.yaml\",\n            \"Param\": \"a.b.c\",\n            \"HEAD\": \"1\",\n            \"workspace\": expected,\n            \"Change\": \"-\",\n        }\n    ]\n\n\n@pytest.mark.parametrize(\"markdown\", [True, False])\ndef test_diff_mocked(mocker, markdown):\n    ret = mocker.MagicMock()\n    m = mocker.patch(\"dvc.compare.diff_table\", return_value=ret)\n\n    show_diff({}, \"metrics\", markdown=markdown)\n\n    m.assert_called_once_with(\n        {},\n        title=\"metrics\",\n        old=True,\n        no_path=False,\n        precision=None,\n        on_empty_diff=None,\n        show_changes=True,\n        round_digits=False,\n        a_rev=None,\n        b_rev=None,\n    )\n    ret.render.assert_called_once_with(markdown=markdown)\n\n\ndef test_diff_default(capsys):\n    show_diff(\n        {\n            \"metrics.yaml\": {\n                \"x.b\": {\"old\": 5, \"new\": 6},\n                \"a.d.e\": {\"old\": 3, \"new\": 4, \"diff\": 1},\n                \"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 1},\n            }\n        },\n        \"Metric\",\n    )\n    out, _ = capsys.readouterr()\n\n    assert out == textwrap.dedent(\n        \"\"\"\\\n        Path          Metric    HEAD    workspace    Change\n        metrics.yaml  a.b.c     1       2            1\n        metrics.yaml  a.d.e     3       4            1\n        metrics.yaml  x.b       5       6            -\n        \"\"\"\n    )\n\n\ndef test_metrics_diff_md(capsys):\n    show_diff(\n        {\n            \"metrics.yaml\": {\n                \"x.b\": {\"old\": 5, \"new\": 6},\n                \"a.d.e\": {\"old\": 3, \"new\": 4, \"diff\": 1},\n                \"a.b.c\": {\"old\": 1, \"new\": 2, \"diff\": 1},\n            }\n        },\n        \"Metric\",\n        markdown=True,\n    )\n    out, _ = capsys.readouterr()\n\n    assert out == textwrap.dedent(\n        \"\"\"\\\n        | Path         | Metric   | HEAD   | workspace   | Change   |\n        |--------------|----------|--------|-------------|----------|\n        | metrics.yaml | a.b.c    | 1      | 2           | 1        |\n        | metrics.yaml | a.d.e    | 3      | 4           | 1        |\n        | metrics.yaml | x.b      | 5      | 6           | -        |\n\n        \"\"\"\n    )\n\n\ndef test_metrics_show_with_valid_falsey_values():\n    td = metrics_table(\n        {\n            \"branch_1\": {\n                \"data\": {\n                    \"metrics.json\": {\"data\": {\"a\": 0, \"b\": {\"ad\": 0.0, \"bc\": 0.0}}}\n                }\n            }\n        },\n        all_branches=True,\n    )\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"0\",\n            \"b.ad\": \"0.0\",\n            \"b.bc\": \"0.0\",\n        }\n    ]\n\n\ndef test_metrics_show_with_no_revision():\n    td = metrics_table(\n        {\n            \"branch_1\": {\n                \"data\": {\n                    \"metrics.json\": {\"data\": {\"a\": 0, \"b\": {\"ad\": 0.0, \"bc\": 0.0}}}\n                }\n            }\n        },\n        all_branches=False,\n    )\n    assert td.as_dict() == [\n        {\"Path\": \"metrics.json\", \"a\": \"0\", \"b.ad\": \"0.0\", \"b.bc\": \"0.0\"}\n    ]\n\n\ndef test_metrics_show_with_non_dict_values():\n    td = metrics_table(\n        {\"branch_1\": {\"data\": {\"metrics.json\": {\"data\": 1}}}},\n        all_branches=True,\n    )\n    assert td.as_dict() == [{\"Revision\": \"branch_1\", \"Path\": \"metrics.json\", \"\": \"1\"}]\n\n\ndef test_metrics_show_with_multiple_revision():\n    td = metrics_table(\n        {\n            \"branch_1\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 1, \"bc\": 2}}}}\n            },\n            \"branch_2\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 3, \"bc\": 4}}}}\n            },\n        },\n        all_branches=True,\n    )\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1\",\n            \"b.ad\": \"1\",\n            \"b.bc\": \"2\",\n        },\n        {\n            \"Revision\": \"branch_2\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1\",\n            \"b.ad\": \"3\",\n            \"b.bc\": \"4\",\n        },\n    ]\n\n\ndef test_metrics_show_with_one_revision_multiple_paths():\n    td = metrics_table(\n        {\n            \"branch_1\": {\n                \"data\": {\n                    \"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 0.1, \"bc\": 1.03}}},\n                    \"metrics_1.json\": {\"data\": {\"a\": 2.3, \"b\": {\"ad\": 6.5, \"bc\": 7.9}}},\n                }\n            }\n        },\n        all_branches=True,\n    )\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1\",\n            \"b.ad\": \"0.1\",\n            \"b.bc\": \"1.03\",\n        },\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics_1.json\",\n            \"a\": \"2.3\",\n            \"b.ad\": \"6.5\",\n            \"b.bc\": \"7.9\",\n        },\n    ]\n\n\ndef test_metrics_show_with_different_metrics_header():\n    td = metrics_table(\n        {\n            \"branch_1\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"b\": {\"ad\": 1, \"bc\": 2}, \"c\": 4}}}\n            },\n            \"branch_2\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 3, \"bc\": 4}}}}\n            },\n        },\n        all_branches=True,\n    )\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"-\",\n            \"b.ad\": \"1\",\n            \"b.bc\": \"2\",\n            \"c\": \"4\",\n        },\n        {\n            \"Revision\": \"branch_2\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1\",\n            \"b.ad\": \"3\",\n            \"b.bc\": \"4\",\n            \"c\": \"-\",\n        },\n    ]\n\n\ndef test_metrics_show_precision():\n    metrics = {\n        \"branch_1\": {\n            \"data\": {\n                \"metrics.json\": {\n                    \"data\": {\n                        \"a\": 1.098765366365355,\n                        \"b\": {\"ad\": 1.5342673, \"bc\": 2.987725527},\n                    }\n                }\n            }\n        }\n    }\n\n    td = metrics_table(metrics, all_branches=True, precision=4)\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1.099\",\n            \"b.ad\": \"1.534\",\n            \"b.bc\": \"2.988\",\n        }\n    ]\n\n    td = metrics_table(metrics, all_branches=True, precision=4, round_digits=True)\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1.0988\",\n            \"b.ad\": \"1.5343\",\n            \"b.bc\": \"2.9877\",\n        }\n    ]\n\n    td = metrics_table(metrics, all_branches=True, precision=7)\n    assert td.as_dict() == [\n        {\n            \"Revision\": \"branch_1\",\n            \"Path\": \"metrics.json\",\n            \"a\": \"1.098765\",\n            \"b.ad\": \"1.534267\",\n            \"b.bc\": \"2.987726\",\n        }\n    ]\n\n\n@pytest.mark.parametrize(\"markdown\", [True, False])\ndef test_metrics_show_mocked(mocker, markdown):\n    ret = mocker.MagicMock()\n    m = mocker.patch(\"dvc.compare.metrics_table\", return_value=ret)\n\n    show_metrics({}, markdown=markdown)\n\n    m.assert_called_once_with(\n        {},\n        all_branches=False,\n        all_tags=False,\n        all_commits=False,\n        precision=None,\n        round_digits=False,\n    )\n    ret.render.assert_called_once_with(markdown=markdown)\n\n\ndef test_metrics_show_default(capsys):\n    show_metrics(\n        metrics={\n            \"branch_1\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"b\": {\"ad\": 1, \"bc\": 2}, \"c\": 4}}},\n                \"error\": Exception(\"Failed just a little bit\"),\n            },\n            \"branch_2\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 3, \"bc\": 4}}}}\n            },\n        },\n        all_branches=True,\n    )\n    out, _ = capsys.readouterr()\n    assert out == textwrap.dedent(\n        \"\"\"\\\n        Revision    Path          a    b.ad    b.bc    c\n        branch_1    metrics.json  -    1       2       4\n        branch_2    metrics.json  1    3       4       -\n        \"\"\"\n    )\n\n\ndef test_metrics_show_markdown(capsys):\n    show_metrics(\n        metrics={\n            \"branch_1\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"b\": {\"ad\": 1, \"bc\": 2}, \"c\": 4}}}\n            },\n            \"branch_2\": {\n                \"data\": {\"metrics.json\": {\"data\": {\"a\": 1, \"b\": {\"ad\": 3, \"bc\": 4}}}}\n            },\n            \"branch_3\": {\"error\": YAMLFileCorruptedError(\"failed\")},\n        },\n        all_branches=True,\n        markdown=True,\n    )\n    out, _ = capsys.readouterr()\n    assert out == textwrap.dedent(\n        \"\"\"\\\n        | Revision   | Path         | a   | b.ad   | b.bc   | c   |\n        |------------|--------------|-----|--------|--------|-----|\n        | branch_1   | metrics.json | -   | 1      | 2      | 4   |\n        | branch_2   | metrics.json | 1   | 3      | 4      | -   |\n\n        \"\"\"\n    )\n"
  },
  {
    "path": "tests/unit/test_config.py",
    "content": "import logging\nimport os\nimport textwrap\n\nimport pytest\n\nfrom dvc.config import Config, ConfigError\nfrom dvc.fs import LocalFileSystem\n\n\n@pytest.mark.parametrize(\n    \"path, expected\",\n    [\n        (\"cache\", \"../cache\"),\n        (os.path.join(\"..\", \"cache\"), \"../../cache\"),\n        (os.getcwd(), os.getcwd()),\n        (\"ssh://some/path\", \"ssh://some/path\"),\n    ],\n)\ndef test_to_relpath(path, expected):\n    assert Config._to_relpath(os.path.join(\".\", \"config\"), path) == expected\n\n\n@pytest.mark.parametrize(\n    \"path, expected\",\n    [\n        (\"cache\", os.path.abspath(os.path.join(\"conf_dir\", \"cache\"))),\n        (\"dir/cache\", os.path.abspath(os.path.join(\"conf_dir\", \"dir\", \"cache\"))),\n        (\"../cache\", os.path.abspath(\"cache\")),\n        (os.getcwd(), os.getcwd()),\n        (\"ssh://some/path\", \"ssh://some/path\"),\n    ],\n)\ndef test_resolve(path, expected):\n    conf_dir = os.path.abspath(\"conf_dir\")\n    assert Config._resolve(conf_dir, path) == expected\n\n\ndef test_resolve_homedir():\n    # NOTE: our test suit patches $HOME, but that only works within the\n    # test itself, so we can't use expanduser in @parametrize here.\n    conf_dir = os.path.abspath(\"conf_dir\")\n    expected = os.path.expanduser(os.path.join(\"~\", \"cache\"))\n    assert Config._resolve(conf_dir, \"~/cache\") == expected\n\n\ndef test_get_fs(tmp_dir, scm):\n    tmp_dir.scm_gen(\"foo\", \"foo\", commit=\"add foo\")\n\n    fs = scm.get_fs(\"master\")\n    config = Config.from_cwd(fs=fs)\n\n    assert config.fs == fs\n    assert config.wfs != fs\n    assert isinstance(config.wfs, LocalFileSystem)\n\n    assert config._get_fs(\"repo\") == fs\n    assert config._get_fs(\"local\") == config.wfs\n    assert config._get_fs(\"global\") == config.wfs\n    assert config._get_fs(\"system\") == config.wfs\n\n\ndef test_s3_ssl_verify(tmp_dir, dvc):\n    config = Config.from_cwd(validate=False)\n    with config.edit() as conf:\n        conf[\"remote\"][\"remote-name\"] = {\"url\": \"s3://bucket/dvc\"}\n\n    assert \"ssl_verify\" not in config[\"remote\"][\"remote-name\"]\n\n    with config.edit() as conf:\n        section = conf[\"remote\"][\"remote-name\"]\n        section[\"ssl_verify\"] = False\n\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        ['remote \"remote-name\"']\n            url = s3://bucket/dvc\n            ssl_verify = False\n        \"\"\"\n    )\n\n    with config.edit() as conf:\n        section = conf[\"remote\"][\"remote-name\"]\n        section[\"ssl_verify\"] = \"/path/to/custom/cabundle.pem\"\n\n    assert (tmp_dir / \".dvc\" / \"config\").read_text() == textwrap.dedent(\n        \"\"\"\\\n        [core]\n            no_scm = True\n        ['remote \"remote-name\"']\n            url = s3://bucket/dvc\n            ssl_verify = /path/to/custom/cabundle.pem\n        \"\"\"\n    )\n\n\ndef test_load_unicode_error(tmp_dir, dvc, mocker):\n    config = Config.from_cwd(validate=False)\n    mocker.patch(\n        \"configobj.ConfigObj\", side_effect=UnicodeDecodeError(\"\", b\"\", 0, 0, \"\")\n    )\n    with pytest.raises(ConfigError):\n        with config.edit():\n            pass\n\n\ndef test_load_configob_error(tmp_dir, dvc, mocker):\n    from configobj import ConfigObjError\n\n    config = Config.from_cwd(validate=False)\n    mocker.patch(\"configobj.ConfigObj\", side_effect=ConfigObjError())\n    with pytest.raises(ConfigError):\n        with config.edit():\n            pass\n\n\ndef test_feature_section_supports_arbitrary_values(caplog):\n    with caplog.at_level(logging.WARNING, logger=\"dvc.config_schema\"):\n        data = Config.validate(\n            {\n                \"feature\": {\n                    \"random_key_1\": \"random_value_1\",\n                    \"random_key_2\": 42,\n                }\n            }\n        )\n\n    assert \"random_key_1\" not in data\n    assert \"random_key_2\" not in data\n    assert (\n        \"'feature.random_key_1', 'feature.random_key_2' config options are unsupported\"\n    ) in caplog.text\n"
  },
  {
    "path": "tests/unit/test_context.py",
    "content": "from dataclasses import asdict\nfrom math import pi\n\nimport pytest\n\nfrom dvc.fs import LocalFileSystem\nfrom dvc.parsing import DEFAULT_PARAMS_FILE\nfrom dvc.parsing.context import (\n    Context,\n    CtxDict,\n    CtxList,\n    KeyNotInContext,\n    MergeError,\n    ParamsLoadError,\n    Value,\n    recurse_not_a_node,\n)\nfrom dvc.utils import relpath\nfrom dvc.utils.serialize import dumps_yaml\n\n\ndef test_context():\n    context = Context({\"foo\": \"bar\"})\n    assert context[\"foo\"] == Value(\"bar\")\n\n    context = Context(foo=\"bar\")\n    assert context[\"foo\"] == Value(\"bar\")\n\n    context[\"foobar\"] = \"foobar\"\n    assert context[\"foobar\"] == Value(\"foobar\")\n\n    del context[\"foobar\"]\n    assert \"foobar\" not in context\n    assert \"foo\" in context\n\n    with pytest.raises(KeyError):\n        _ = context[\"foobar\"]\n\n\ndef test_context_dict_ignores_keys_except_str():\n    c = Context({\"one\": 1, 3: 3})\n    assert \"one\" in c\n    assert 3 not in c\n\n    c[3] = 3\n    assert 3 not in c\n\n\ndef test_context_list():\n    lst = [\"foo\", \"bar\", \"baz\"]\n    context = Context(lst=lst)\n\n    assert context[\"lst\"] == CtxList(lst)\n    assert context[\"lst\"][0] == Value(\"foo\")\n    del context[\"lst\"][-1]\n\n    assert \"baz\" not in context\n\n    with pytest.raises(IndexError):\n        _ = context[\"lst\"][3]\n\n    context[\"lst\"].insert(0, \"baz\")\n    assert context[\"lst\"] == CtxList([\"baz\", *lst[:2]])\n\n\ndef test_context_setitem_getitem():\n    context = Context()\n    lst = [1, 2, \"three\", True, pi, b\"bytes\", None]\n    context[\"list\"] = lst\n\n    assert isinstance(context[\"list\"], CtxList)\n    assert context[\"list\"] == CtxList(lst)\n    for i, val in enumerate(lst):\n        assert context[\"list\"][i] == Value(val)\n\n    d = {\n        \"foo\": \"foo\",\n        \"bar\": \"bar\",\n        \"list\": [\n            {\"foo0\": \"foo0\", \"bar0\": \"bar0\"},\n            {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n        ],\n    }\n    context[\"data\"] = d\n\n    assert isinstance(context[\"data\"], CtxDict)\n    assert context[\"data\"] == CtxDict(d)\n    assert context[\"data\"][\"foo\"] == Value(\"foo\")\n    assert context[\"data\"][\"bar\"] == Value(\"bar\")\n\n    assert isinstance(context[\"data\"][\"list\"], CtxList)\n    assert context[\"data\"][\"list\"] == CtxList(d[\"list\"])\n\n    for i, val in enumerate(d[\"list\"]):\n        c = context[\"data\"][\"list\"][i]\n        assert isinstance(c, CtxDict)\n        assert c == CtxDict(val)\n        assert c[f\"foo{i}\"] == Value(f\"foo{i}\")\n        assert c[f\"bar{i}\"] == Value(f\"bar{i}\")\n\n    with pytest.raises(TypeError):\n        context[\"set\"] = {1, 2, 3}\n\n\ndef test_loop_context():\n    context = Context({\"foo\": \"foo\", \"bar\": \"bar\", \"lst\": [1, 2, 3]})\n\n    assert list(context) == [\"foo\", \"bar\", \"lst\"]\n    assert len(context) == 3\n\n    assert list(context[\"lst\"]) == [Value(i) for i in [1, 2, 3]]\n    assert len(context[\"lst\"]) == 3\n\n    assert list(context.items()) == [\n        (\"foo\", Value(\"foo\")),\n        (\"bar\", Value(\"bar\")),\n        (\"lst\", CtxList([1, 2, 3])),\n    ]\n\n\ndef test_repr():\n    data = {\"foo\": \"foo\", \"bar\": \"bar\", \"lst\": [1, 2, 3]}\n    context = Context(data)\n\n    assert repr(context) == repr(data)\n    assert str(context) == str(data)\n\n\ndef test_select():\n    context = Context(foo=\"foo\", bar=\"bar\", lst=[1, 2, 3])\n\n    assert context.select(\"foo\") == Value(\"foo\")\n    assert context.select(\"bar\") == Value(\"bar\")\n    assert context.select(\"lst\") == CtxList([1, 2, 3])\n    assert context.select(\"lst.0\") == Value(1)\n\n    with pytest.raises(KeyNotInContext):\n        context.select(\"baz\")\n\n    d = {\n        \"lst\": [\n            {\"foo0\": \"foo0\", \"bar0\": \"bar0\"},\n            {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n        ]\n    }\n    context = Context(d)\n    assert context.select(\"lst\") == CtxList(d[\"lst\"])\n    assert context.select(\"lst.0\") == CtxDict(d[\"lst\"][0])\n    assert context.select(\"lst.1\") == CtxDict(d[\"lst\"][1])\n\n    with pytest.raises(KeyNotInContext):\n        context.select(\"lst.2\")\n\n    for i, _ in enumerate(d[\"lst\"]):\n        assert context.select(f\"lst.{i}.foo{i}\") == Value(f\"foo{i}\")\n        assert context.select(f\"lst.{i}.bar{i}\") == Value(f\"bar{i}\")\n\n\ndef test_select_unwrap():\n    context = Context({\"dct\": {\"foo\": \"bar\"}}, lst=[1, 2, 3], foo=\"foo\")\n\n    assert context.select(\"dct.foo\", unwrap=True) == \"bar\"\n    assert context.select(\"lst.0\", unwrap=True) == 1\n    assert context.select(\"foo\", unwrap=True) == \"foo\"\n\n    node = context.select(\"dct\", unwrap=True)\n    assert isinstance(node, dict)\n    assert recurse_not_a_node(node)\n    assert node == {\"foo\": \"bar\"}\n\n    node = context.select(\"lst\", unwrap=True)\n    assert isinstance(node, list)\n    assert recurse_not_a_node(node)\n    assert node == [1, 2, 3]\n\n\ndef test_merge_dict():\n    d1 = {\"Train\": {\"us\": {\"lr\": 10}}}\n    d2 = {\"Train\": {\"us\": {\"layers\": 100}}}\n\n    c1 = Context(d1)\n    c2 = Context(d2)\n\n    c1.merge_update(c2)\n    assert c1.select(\"Train.us\") == CtxDict(lr=10, layers=100)\n\n    with pytest.raises(MergeError):\n        # cannot overwrite by default\n        c1.merge_update({\"Train\": {\"us\": {\"lr\": 15}}})\n\n    c1.merge_update({\"Train\": {\"us\": {\"lr\": 15}}}, overwrite=True)\n    node = c1.select(\"Train.us\")\n    assert node == {\"lr\": 15, \"layers\": 100}\n    assert isinstance(node, CtxDict)\n    assert node[\"lr\"] == Value(15)\n    assert node[\"layers\"] == Value(100)\n\n\ndef test_merge_list():\n    c1 = Context(lst=[1, 2, 3])\n    with pytest.raises(MergeError):\n        # cannot overwrite by default\n        c1.merge_update({\"lst\": [10, 11, 12]})\n\n    # lists are never merged\n    c1.merge_update({\"lst\": [10, 11, 12]}, overwrite=True)\n    node = c1.select(\"lst\")\n    assert node == [10, 11, 12]\n    assert isinstance(node, CtxList)\n    assert node[0] == Value(10)\n\n\ndef test_overwrite_with_setitem():\n    context = Context(foo=\"foo\", d={\"bar\": \"bar\", \"baz\": \"baz\"})\n    context[\"d\"] = \"overwrite\"\n    assert \"d\" in context\n    assert context[\"d\"] == Value(\"overwrite\")\n\n\ndef test_load_from(mocker):\n    d = {\"x\": {\"y\": {\"z\": 5}, \"lst\": [1, 2, 3]}, \"foo\": \"foo\"}\n    fs = mocker.Mock(\n        open=mocker.mock_open(read_data=dumps_yaml(d)),\n        **{\"exists.return_value\": True, \"isdir.return_value\": False},\n    )\n    file = \"params.yaml\"\n    c = Context.load_from(fs, file)\n\n    assert asdict(c[\"x\"].meta) == {\n        \"source\": file,\n        \"dpaths\": [\"x\"],\n        \"local\": False,\n    }\n    assert asdict(c[\"foo\"].meta) == {\n        \"source\": file,\n        \"local\": False,\n        \"dpaths\": [\"foo\"],\n    }\n    assert asdict(c[\"x\"][\"y\"].meta) == {\n        \"source\": file,\n        \"dpaths\": [\"x\", \"y\"],\n        \"local\": False,\n    }\n    assert asdict(c[\"x\"][\"y\"][\"z\"].meta) == {\n        \"source\": file,\n        \"dpaths\": [\"x\", \"y\", \"z\"],\n        \"local\": False,\n    }\n    assert asdict(c[\"x\"][\"lst\"].meta) == {\n        \"source\": file,\n        \"dpaths\": [\"x\", \"lst\"],\n        \"local\": False,\n    }\n    assert asdict(c[\"x\"][\"lst\"][0].meta) == {\n        \"source\": file,\n        \"dpaths\": [\"x\", \"lst\", \"0\"],\n        \"local\": False,\n    }\n\n\ndef test_clone():\n    d = {\n        \"dct\": {\n            \"foo0\": \"foo0\",\n            \"bar0\": \"bar0\",\n            \"foo1\": \"foo1\",\n            \"bar1\": \"bar1\",\n        },\n        \"lst\": [1, 2, 3],\n    }\n    c1 = Context(d)\n    c2 = Context.clone(c1)\n\n    c2[\"dct\"][\"foo0\"] = \"foo\"\n    del c2[\"dct\"][\"foo1\"]\n\n    assert c1 != c2\n    assert c1 == Context(d)\n    assert c2.select(\"lst.0\") == Value(1)\n    with pytest.raises(KeyNotInContext):\n        c2.select(\"lst.1.not_existing_key\")\n\n\ndef test_track(tmp_dir):\n    d = {\n        \"lst\": [\n            {\"foo0\": \"foo0\", \"bar0\": \"bar0\"},\n            {\"foo1\": \"foo1\", \"bar1\": \"bar1\"},\n        ],\n        \"dct\": {\"foo\": \"foo\", \"bar\": \"bar\", \"baz\": \"baz\"},\n    }\n    fs = LocalFileSystem()\n    (tmp_dir / \"params.yaml\").dump(d, fs=fs)\n\n    context = Context.load_from(fs, \"params.yaml\")\n\n    def key_tracked(d, key):\n        assert len(d) == 1\n        return key in d[\"params.yaml\"]\n\n    with context.track() as tracked:\n        context.select(\"lst\")\n        assert key_tracked(tracked, \"lst\")\n\n        context.select(\"dct\")\n        assert not key_tracked(tracked, \"dct\")\n\n        context.select(\"dct.foo\")\n        assert key_tracked(tracked, \"dct.foo\")\n\n        # Currently, it's unable to track dictionaries, as it can be merged\n        # from multiple sources.\n        context.select(\"lst.0\")\n        assert not key_tracked(tracked, \"lst.0\")\n\n        # FIXME: either support tracking list values in ParamsDependency\n        # or, prevent this from being tracked.\n        context.select(\"lst.0.foo0\")\n        assert key_tracked(tracked, \"lst.0.foo0\")\n\n\ndef test_track_from_multiple_files(tmp_dir):\n    d1 = {\"Train\": {\"us\": {\"lr\": 10}}}\n    d2 = {\"Train\": {\"us\": {\"layers\": 100}}}\n\n    fs = LocalFileSystem()\n    path1 = \"params.yaml\"\n    path2 = \"params2.yaml\"\n    (tmp_dir / path1).dump(d1, fs=fs)\n    (tmp_dir / path2).dump(d2, fs=fs)\n\n    context = Context.load_from(fs, path1)\n    c = Context.load_from(fs, path2)\n    context.merge_update(c)\n\n    def key_tracked(d, path, key):\n        return key in d[relpath(path)]\n\n    with context.track() as tracked:\n        context.select(\"Train\")\n        assert not key_tracked(tracked, path1, \"Train\")\n        assert not key_tracked(tracked, path2, \"Train\")\n\n        context.select(\"Train.us\")\n        assert not key_tracked(tracked, path1, \"Train.us\")\n        assert not key_tracked(tracked, path2, \"Train.us\")\n\n        context.select(\"Train.us.lr\")\n        assert key_tracked(tracked, path1, \"Train.us.lr\")\n        assert not key_tracked(tracked, path2, \"Train.us.lr\")\n        context.select(\"Train.us.layers\")\n        assert not key_tracked(tracked, path1, \"Train.us.layers\")\n        assert key_tracked(tracked, path2, \"Train.us.layers\")\n\n    context = Context.clone(context)\n    assert not context._tracked_data\n\n    # let's see with an alias\n    context[\"us\"] = context[\"Train\"][\"us\"]\n    with context.track() as tracked:\n        context.select(\"us\")\n        assert not key_tracked(tracked, path1, \"Train.us\")\n        assert not key_tracked(tracked, path2, \"Train.us\")\n\n        context.select(\"us.lr\")\n        assert key_tracked(tracked, path1, \"Train.us.lr\")\n        assert not key_tracked(tracked, path2, \"Train.us.lr\")\n        context.select(\"Train.us.layers\")\n        assert not key_tracked(tracked, path1, \"Train.us.layers\")\n        assert key_tracked(tracked, path2, \"Train.us.layers\")\n\n\ndef test_node_value():\n    d = {\"dct\": {\"foo\": \"bar\"}, \"lst\": [1, 2, 3], \"foo\": \"foo\"}\n    context = Context(d)\n    assert isinstance(context, (Context, CtxDict))\n    assert isinstance(context[\"dct\"], CtxDict)\n    assert isinstance(context[\"lst\"], CtxList)\n    assert isinstance(context[\"foo\"], Value)\n    assert isinstance(context[\"dct\"][\"foo\"], Value)\n    assert isinstance(context[\"lst\"][0], Value)\n\n    assert context.value == d\n    assert recurse_not_a_node(context.value)\n    assert isinstance(context.value[\"dct\"], dict)\n    assert isinstance(context.value[\"lst\"], list)\n    assert isinstance(context.value[\"foo\"], str)\n    assert isinstance(context.value[\"dct\"][\"foo\"], str)\n    assert isinstance(context.value[\"lst\"][0], int)\n\n    assert isinstance(context[\"dct\"].value, dict)\n    assert context[\"dct\"][\"foo\"].value == \"bar\"\n\n    assert isinstance(context[\"lst\"].value, list)\n    assert context[\"lst\"][1].value == 2\n\n    assert context[\"foo\"].value == \"foo\"\n\n\ndef test_resolve_resolves_dict_keys():\n    d = {\"dct\": {\"foo\": \"foobar\", \"persist\": True}}\n\n    context = Context(d)\n    assert context.resolve({\"${dct.foo}\": {\"persist\": \"${dct.persist}\"}}) == {\n        \"foobar\": {\"persist\": True}\n    }\n\n\ndef test_resolve_resolves_boolean_value():\n    d = {\"enabled\": True, \"disabled\": False}\n    context = Context(d)\n\n    assert context.resolve_str(\"${enabled}\") is True\n    assert context.resolve_str(\"${disabled}\") is False\n\n    assert context.resolve_str(\"--flag ${enabled}\") == \"--flag true\"\n    assert context.resolve_str(\"--flag ${disabled}\") == \"--flag false\"\n\n\ndef test_load_from_raises_if_file_not_exist(tmp_dir, dvc):\n    with pytest.raises(ParamsLoadError) as exc_info:\n        Context.load_from(dvc.fs, DEFAULT_PARAMS_FILE)\n\n    assert str(exc_info.value) == \"'params.yaml' does not exist\"\n\n\ndef test_load_from_raises_if_file_is_directory(tmp_dir, dvc):\n    (tmp_dir / \"data\").mkdir()\n\n    with pytest.raises(ParamsLoadError) as exc_info:\n        Context.load_from(dvc.fs, \"data\")\n\n    assert str(exc_info.value) == \"'data' is a directory\"\n"
  },
  {
    "path": "tests/unit/test_daemon.py",
    "content": "import inspect\nimport os\n\nfrom dvc import daemon\n\n\ndef test_daemon(mocker):\n    mock = mocker.patch(\"dvc.daemon._spawn\")\n    daemon.daemon([\"updater\"])\n\n    mock.assert_called()\n    args = mock.call_args[0]\n    env = args[2]\n    assert \"PYTHONPATH\" in env\n\n    file_path = os.path.abspath(inspect.stack()[0][1])\n    file_dir = os.path.dirname(file_path)\n    test_dir = os.path.dirname(file_dir)\n    dvc_dir = os.path.dirname(test_dir)\n    assert env[\"PYTHONPATH\"] == dvc_dir\n    assert env[daemon.DVC_DAEMON] == \"1\"\n\n\ndef test_no_recursive_spawn(mocker):\n    mocker.patch.dict(os.environ, {daemon.DVC_DAEMON: \"1\"})\n    mock_spawn = mocker.patch(\"dvc.daemon._spawn\")\n    daemon.daemon([\"updater\"])\n    mock_spawn.assert_not_called()\n"
  },
  {
    "path": "tests/unit/test_dirs.py",
    "content": "import sys\n\nimport pytest\n\nfrom dvc.dirs import global_config_dir, site_cache_dir\nfrom dvc.env import DVC_GLOBAL_CONFIG_DIR, DVC_SITE_CACHE_DIR\n\n\ndef test_global_config_dir_respects_env_var(monkeypatch):\n    path = \"/some/random/path\"\n    monkeypatch.setenv(DVC_GLOBAL_CONFIG_DIR, path)\n    assert global_config_dir() == path\n\n\n@pytest.mark.skipif(sys.platform != \"linux\", reason=\"Only for Unix platforms\")\ndef test_site_cache_dir_on_unix(monkeypatch):\n    monkeypatch.delenv(DVC_SITE_CACHE_DIR, raising=False)\n    assert site_cache_dir() == \"/var/tmp/dvc\"\n\n\ndef test_site_cache_dir_env_var(monkeypatch):\n    monkeypatch.setenv(DVC_SITE_CACHE_DIR, \"foo_bar\")\n    assert site_cache_dir() == \"foo_bar\"\n\n\ndef test_site_cache_dir_with_config_parameter(monkeypatch):\n    monkeypatch.delenv(DVC_SITE_CACHE_DIR)\n    assert site_cache_dir(config_site_cache_dir=\"foo_bar\") == \"foo_bar\"\n\n\ndef test_site_cache_dir_env_var_precedence(monkeypatch):\n    monkeypatch.setenv(DVC_SITE_CACHE_DIR, \"foo\")\n    assert site_cache_dir(config_site_cache_dir=\"bar\") == \"foo\"\n"
  },
  {
    "path": "tests/unit/test_dvcfile.py",
    "content": "import pytest\n\nfrom dvc.dvcfile import (\n    LOCK_FILE,\n    PROJECT_FILE,\n    FileIsGitIgnored,\n    ProjectFile,\n    SingleStageFile,\n    load_file,\n)\nfrom dvc.stage import PipelineStage\nfrom dvc.stage.exceptions import StageFileDoesNotExistError, StageFileIsNotDvcFileError\nfrom dvc.utils.fs import remove\nfrom dvc.utils.serialize import EncodingError\nfrom dvc.utils.strictyaml import YAMLValidationError\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        \"pipelines.yaml\",\n        \"pipelines.yml\",\n        \"custom-pipelines.yml\",\n        \"custom-pipelines.yaml\",\n        \"../models/pipelines.yml\",\n    ],\n)\ndef test_pipelines_file(path):\n    file_obj = load_file(object(), path)\n    assert isinstance(file_obj, ProjectFile)\n\n\n@pytest.mark.parametrize(\"path\", [\"Dvcfile\", \"stage.dvc\", \"../models/stage.dvc\"])\ndef test_pipelines_single_stage_file(path):\n    file_obj = load_file(object(), path)\n    assert isinstance(file_obj, SingleStageFile)\n\n\n@pytest.mark.parametrize(\"file\", [\"stage.dvc\", \"dvc.yaml\"])\n@pytest.mark.parametrize(\"is_dvcignored\", [True, False])\ndef test_stage_load_on_not_existing_file(tmp_dir, dvc, file, is_dvcignored):\n    dvcfile = load_file(dvc, file)\n    if is_dvcignored:\n        (tmp_dir / \".dvcignore\").write_text(file)\n\n    assert not dvcfile.exists()\n    with pytest.raises(StageFileDoesNotExistError) as exc_info:\n        assert dvcfile.stages.values()\n\n    assert str(exc_info.value) == f\"'{file}' does not exist\"\n\n\n@pytest.mark.parametrize(\"file\", [\"stage.dvc\", \"dvc.yaml\"])\ndef test_stage_load_on_non_file(tmp_dir, dvc, file):\n    (tmp_dir / file).mkdir()\n    dvcfile = load_file(dvc, file)\n    with pytest.raises(StageFileIsNotDvcFileError):\n        assert dvcfile.stages.values()\n\n\n@pytest.mark.parametrize(\"file\", [\"stage.dvc\", \"dvc.yaml\"])\ndef test_stage_load_on_invalid_data(tmp_dir, dvc, file):\n    data = {\"is_this_a_valid_dvcfile\": False}\n    (tmp_dir / file).dump(data)\n    dvcfile = load_file(dvc, file)\n    with pytest.raises(YAMLValidationError):\n        assert dvcfile.stages\n    with pytest.raises(YAMLValidationError):\n        assert dvcfile.validate(data, file)\n\n\ndef test_dump_stage(tmp_dir, dvc):\n    stage = PipelineStage(dvc, cmd=\"command\", name=\"stage_name\", path=\"dvc.yaml\")\n    dvcfile = load_file(dvc, \"dvc.yaml\")\n\n    dvcfile.dump(stage, update_lock=False, update_pipeline=False)\n    assert not (tmp_dir / PROJECT_FILE).exists()\n    assert not (tmp_dir / LOCK_FILE).exists()\n\n    dvcfile.dump(stage, update_pipeline=False)\n    assert not (tmp_dir / PROJECT_FILE).exists()\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert dvcfile._lockfile.load()\n\n    remove(tmp_dir / LOCK_FILE)\n\n    dvcfile.dump(stage)\n    assert (tmp_dir / PROJECT_FILE).exists()\n    assert (tmp_dir / LOCK_FILE).exists()\n    assert list(dvcfile.stages.values()) == [stage]\n\n\ndef test_dump_multiple_pipeline_stages(tmp_dir, dvc):\n    stage1 = PipelineStage(dvc, cmd=\"cmd1\", name=\"stage1\", path=\"dvc.yaml\")\n    stage2 = PipelineStage(dvc, cmd=\"cmd2\", name=\"stage2\", path=\"dvc.yaml\")\n    dvcfile = load_file(dvc, \"dvc.yaml\")\n\n    dvcfile.dump_stages([stage1, stage2], update_lock=False, update_pipeline=False)\n    assert not (tmp_dir / LOCK_FILE).exists()\n    assert not (tmp_dir / PROJECT_FILE).exists()\n\n    dvcfile.dump_stages([stage1, stage2], update_pipeline=False)\n    assert not (tmp_dir / PROJECT_FILE).exists()\n    assert (tmp_dir / LOCK_FILE).parse() == {\n        \"schema\": \"2.0\",\n        \"stages\": {\"stage1\": {\"cmd\": \"cmd1\"}, \"stage2\": {\"cmd\": \"cmd2\"}},\n    }\n\n    dvcfile.dump_stages([stage1, stage2], update_lock=False)\n    assert (tmp_dir / PROJECT_FILE).parse() == {\n        \"stages\": {\"stage1\": {\"cmd\": \"cmd1\"}, \"stage2\": {\"cmd\": \"cmd2\"}}\n    }\n\n\ndef test_dump_stages_single_stage(tmp_dir, dvc):\n    stage = dvc.stage.create(\n        fname=\"foo.dvc\", outs=[\"out\"], deps=[\"dep\"], single_stage=True\n    )\n    stage.dvcfile.dump_stages([stage])\n    assert (tmp_dir / \"foo.dvc\").parse() == {\n        \"deps\": [{\"hash\": \"md5\", \"path\": \"dep\"}],\n        \"outs\": [{\"hash\": \"md5\", \"path\": \"out\"}],\n    }\n\n\n@pytest.mark.parametrize(\"file\", [\"stage.dvc\", \"dvc.yaml\"])\ndef test_stage_load_file_exists_but_dvcignored(tmp_dir, dvc, scm, file):\n    (tmp_dir / file).write_text(\"\")\n    (tmp_dir / \".dvcignore\").write_text(file)\n\n    dvc._reset()\n    dvcfile = load_file(dvc, file)\n    with pytest.raises(StageFileDoesNotExistError) as exc_info:\n        assert dvcfile.stages.values()\n\n    assert str(exc_info.value) == f\"'{file}' is dvc-ignored\"\n\n\n@pytest.mark.parametrize(\"file\", [\"foo.dvc\", \"dvc.yaml\"])\ndef test_try_loading_dvcfile_that_is_gitignored(tmp_dir, dvc, scm, file):\n    with open(tmp_dir / \".gitignore\", \"a+\", encoding=\"utf-8\") as fd:\n        fd.write(file)\n\n    # create a file just to avoid other checks\n    (tmp_dir / file).write_text(\"\")\n    scm._reset()\n\n    dvcfile = load_file(dvc, file)\n    with pytest.raises(FileIsGitIgnored) as exc_info:\n        dvcfile._load()\n\n    assert str(exc_info.value) == f\"bad DVC file name '{file}' is git-ignored.\"\n\n\ndef test_dvcfile_encoding_error(tmp_dir, dvc):\n    tmp_dir.gen(PROJECT_FILE, b\"\\x80some: stuff\")\n\n    dvcfile = load_file(dvc, PROJECT_FILE)\n    with pytest.raises(EncodingError):\n        dvcfile._load()\n"
  },
  {
    "path": "tests/unit/test_hashinfo.py",
    "content": "from dvc_data.hashfile.hash_info import HashInfo\n\n\ndef test_as_raw():\n    hash_info = HashInfo(\"md5\", \"a1d0c6e83f027327d8461063f4ac58a6.dir\", \"objname\")\n\n    raw = hash_info.as_raw()\n\n    assert hash_info.name == \"md5\"\n    assert hash_info.value == \"a1d0c6e83f027327d8461063f4ac58a6.dir\"\n    assert hash_info.obj_name == \"objname\"\n\n    assert raw.name == \"md5\"\n    assert raw.value == \"a1d0c6e83f027327d8461063f4ac58a6\"\n    assert raw.obj_name == \"objname\"\n"
  },
  {
    "path": "tests/unit/test_ignore.py",
    "content": "import os\nfrom os.path import join\n\nimport pytest\n\nfrom dvc.ignore import DvcIgnorePatterns\n\n\n@pytest.mark.parametrize(\n    \"file_to_ignore_relpath, patterns,  expected_match\",\n    [\n        # all rules from https://git-scm.com/docs/gitignore\n        (\"to_ignore\", [\"to_ignore\"], True),\n        (\"dont_ignore.txt\", [\"dont_ignore\"], False),\n        # A blank line matches no files, so it can serve as a separator for\n        # readability.\n        (\"to_ignore\", [\"\", \"to_ignore\"], True),\n        # A line starting with # serves as a comment.\n        # Put a backslash (\"\\\") in front of the first hash for patterns\n        # that begin with a hash.\n        (\"#to_ignore\", [\"\\\\#to_ignore\"], True),\n        (\"#to_ignore\", [\"#to_ignore\"], False),\n        # Trailing spaces are ignored unless they are quoted with\n        # backslash (\"\\\").\n        (\" to_ignore\", [\" to_ignore\"], False),\n        (\" to_ignore\", [\"\\\\ to_ignore\"], True),\n        # An optional prefix \"!\" which negates the pattern; any matching file\n        # excluded by a previous pattern will become included again.\n        (\"to_ignore.txt\", [\"to_ignore*\"], True),\n        (\"to_ignore.txt\", [\"to_ignore*\", \"!to_ignore.txt\"], False),\n        (\"to_ignore.txt\", [\"!to_ignore.txt\", \"to_ignore*\"], True),\n        # It is not possible to re-include a file if a parent directory of\n        # that file is excluded.\n        # Git doesn't list excluded directories for performance reasons,\n        # so any patterns on contained files have no effect,\n        # no matter where they are defined.\n        # see (`tests/func/test_ignore.py::test_ignore_parent_path`)\n        # Put a backslash (\"\\\") in front of the first \"!\"\n        # for patterns that begin with a literal \"!\",\n        # for example, \"\\!important!.txt\".\n        (\"!to_ignore.txt\", [\"\\\\!to_ignore.txt\"], True),\n        # The slash / is used as the directory separator.\n        # Separators may occur at the beginning, middle or end of the\n        # .gitignore search pattern.\n        # If there is a separator at the beginning or middle (or both)\n        # of the pattern, then the pattern is relative to the directory\n        # level of the particular .gitignore file itself.\n        # Otherwise the pattern may also match at any level below\n        # the .gitignore level.\n        (\"file\", [\"/file\"], True),\n        (os.path.join(\"data\", \"file\"), [\"/file\"], False),\n        (os.path.join(\"data\", \"file\"), [\"data/file\"], True),\n        (os.path.join(\"other\", \"data\", \"file\"), [\"data/file\"], False),\n        (\n            os.path.join(\n                os.path.sep,\n                \"full\",\n                \"path\",\n                \"to\",\n                \"ignore\",\n                \"file\",\n                \"to_ignore\",\n            ),\n            [\"to_ignore\"],\n            True,\n        ),\n        # If there is a separator at the end of the pattern then the pattern\n        # will only match directories,\n        # otherwise the pattern can match both files and directories.\n        # For example, a pattern doc/frotz/ matches doc/frotz directory,\n        # but not a/doc/frotz directory;\n        # see (`tests/func/test_ignore.py::test_ignore_sub_directory`)\n        # however frotz/ matches frotz and a/frotz that is a directory\n        # (all paths are relative from the .gitignore file).\n        # see (`tests/func/test_ignore.py::test_ignore_directory`)\n        # An asterisk \"*\" matches anything except a slash.\n        (\"to_ignore.txt\", [\"/*.txt\"], True),\n        (os.path.join(\"path\", \"to_ignore.txt\"), [\"/*.txt\"], False),\n        (os.path.join(\"data\", \"file.txt\"), [\"data/*\"], True),\n        (os.path.join(\"data\", \"subdir\", \"file.txt\"), [\"data/*\"], True),\n        (os.path.join(\"data\", \"file.txt\"), [\"data/\"], True),\n        (os.path.join(\"data\", \"subdir\", \"file.txt\"), [\"data/\"], True),\n        (os.path.join(\"data\", \"subdir\", \"file.txt\"), [\"subdir/\"], True),\n        (os.path.join(\"data\", \"subdir\", \"file.txt\"), [\"/subdir/\"], False),\n        (os.path.join(\"data\", \"path\"), [\"path/\"], False),\n        (os.path.join(\".git\", \"file.txt\"), [\".git/\"], True),\n        (os.path.join(\"data\", \".dvc\", \"file.txt\"), [\".dvc/\"], True),\n        # wait for Git\n        # (os.path.join(\"data\", \"sub\", \"file.txt\"), [\"data/*\"], True),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"to_ignore\"),\n            [\"rel/*/to_ignore\"],\n            False,\n        ),\n        (\"file.txt\", [\"file.*\"], True),\n        # The character \"?\" matches any one character except \"/\".\n        (\"file.txt\", [\"fi?e.t?t\"], True),\n        (\"fi/e.txt\", [\"fi?e.t?t\"], False),\n        # The range notation, e.g. [a-zA-Z], can be used\n        # to match one of the characters in a range. See fnmatch(3) and\n        # the FNM_PATHNAME flag for a more detailed description.\n        (\"file.txt\", [\"[a-zA-Z]ile.txt\"], True),\n        (\"2ile.txt\", [\"[a-zA-Z]ile.txt\"], False),\n        # Two consecutive asterisks (\"**\") in patterns matched against\n        # full pathname may have special meaning:\n        # A leading \"**\" followed by a slash means match in all directories.\n        # For example, \"**/foo\" matches file or directory \"foo\" anywhere, the\n        # same as pattern \"foo\".\n        # \"**/foo/bar\" matches file or directory \"bar\" anywhere that is\n        # directly under directory \"foo\".\n        (os.path.join(\"rel\", \"p\", \"p2\", \"to_ignore\"), [\"**/to_ignore\"], True),\n        (\n            os.path.join(\"rel\", \"p\", \"p2\", \"to_ignore\"),\n            [\"**/p2/to_ignore\"],\n            True,\n        ),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"dont_ignore\"),\n            [\"**/to_ignore\"],\n            False,\n        ),\n        # A trailing \"/**\" matches everything inside.\n        # For example, \"abc/**\" matches all files inside directory \"abc\",\n        # relative to the location of the .gitignore file, with infinite depth.\n        (os.path.join(\"rel\", \"p\", \"p2\", \"to_ignore\"), [\"rel/**\"], True),\n        (os.path.join(\"rel\", \"p\", \"p2\", \"to_ignore\"), [\"p/**\"], False),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"dont_ignore\"),\n            [\"rel/**\"],\n            True,\n        ),\n        # A slash followed by two consecutive asterisks then a slash matches\n        # zero or more directories.\n        # For example, \"a/**/b\" matches \"a/b\", \"a/x/b\", \"a/x/y/b\" and so on.\n        (os.path.join(\"rel\", \"p\", \"to_ignore\"), [\"rel/**/to_ignore\"], True),\n        (\n            os.path.join(\"rel\", \"p\", \"p2\", \"to_ignore\"),\n            [\"rel/**/to_ignore\"],\n            True,\n        ),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"dont_ignore\"),\n            [\"rel/**/to_ignore\"],\n            False,\n        ),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"dont_ignore\"),\n            [\"path/**/dont_ignore\"],\n            False,\n        ),\n        # Other consecutive asterisks are considered regular asterisks\n        # and will match according to the previous rules.\n        (\"to_ignore.txt\", [\"/***.txt\"], True),\n        (os.path.join(\"path\", \"to_ignore.txt\"), [\"/****.txt\"], False),\n        (os.path.join(\"path\", \"to_ignore.txt\"), [\"****.txt\"], True),\n        (os.path.join(\"data\", \"file.txt\"), [\"data/***\"], True),\n        # bug from PathSpec\n        # (os.path.join(\"data\", \"p\", \"file.txt\"), [\"data/***\"], False),\n        (os.path.join(\"data\", \"p\", \"file.txt\"), [\"***/file.txt\"], False),\n        (\n            os.path.join(\"rel\", \"path\", \"path2\", \"to_ignore\"),\n            [\"rel/***/to_ignore\"],\n            False,\n        ),\n    ],\n)\ndef test_match_ignore_from_file(\n    file_to_ignore_relpath, patterns, expected_match, mocker\n):\n    from dvc.fs import localfs\n\n    root = r\"\\\\\" if os.name == \"nt\" else \"/\"\n    dvcignore_path = os.path.join(\n        root, \"full\", \"path\", \"to\", \"ignore\", \"file\", \".dvcignore\"\n    )\n    dvcignore_dirname = os.path.dirname(dvcignore_path)\n\n    mocker.patch.object(\n        localfs, \"open\", mocker.mock_open(read_data=\"\\n\".join(patterns))\n    )\n    ignore_file = DvcIgnorePatterns.from_file(dvcignore_path, localfs, \"mocked\")\n\n    assert (\n        ignore_file.matches(dvcignore_dirname, file_to_ignore_relpath) == expected_match\n    )\n\n\n@pytest.mark.parametrize(\"sub_dir\", [\"\", \"dir\"])\n@pytest.mark.parametrize(\"omit_dir\", [\".git\", \".hg\", \".dvc\"])\ndef test_should_ignore_dir(omit_dir, sub_dir):\n    root = os.path.join(os.path.sep, \"walk\", \"dir\", \"root\")\n    ignore = DvcIgnorePatterns([\".git/\", \".hg/\", \".dvc/\"], root, os.sep)\n\n    dirs = [omit_dir, \"dir1\", \"dir2\"]\n    files = [omit_dir, \"file1\", \"file2\"]\n\n    if sub_dir:\n        current = os.path.join(root, sub_dir)\n    else:\n        current = root\n\n    new_dirs, new_files = ignore(current, dirs, files)\n\n    assert set(new_dirs) == {\"dir1\", \"dir2\"}\n    assert set(new_files) == {\"file1\", \"file2\", omit_dir}\n\n\ndef test_ignore_complex(tmp_dir, dvc):\n    from dvc.fs import localfs\n\n    spec = \"\"\"\\\n# Ignore everything\n1/**\n# Except directories (leaves all files ignored)\n!1/**/\n# Don't ignore files in 3\n!seq/**/3/**\n\ndata/\n!data/keep.csv\n\ndata2/**\n!data2/**/\n!data2/**/*.csv\n\nignore.txt\n!no-ignore.txt\n\"\"\"\n    (tmp_dir / \".dvcignore\").write_text(spec)\n    (tmp_dir / \"1\" / \"2\" / \"3\").mkdir(parents=True, exist_ok=True)\n    (tmp_dir / \"1\" / \"2\" / \"shouldIgnore.txt\").touch()\n    (tmp_dir / \"1\" / \"2\" / \"3\" / \"shouldKeep.txt\").touch()\n    (tmp_dir / \"data\" / \"subdir\").mkdir(parents=True, exist_ok=True)\n    (tmp_dir / \"data2\" / \"subdir\").mkdir(parents=True, exist_ok=True)\n    (tmp_dir / \"data\" / \"keep.csv\").touch()\n    (tmp_dir / \"data\" / \"other.csv\").touch()\n    (tmp_dir / \"data\" / \"subdir\" / \"file.txt\").touch()\n    (tmp_dir / \"data2\" / \"keep.csv\").touch()\n    (tmp_dir / \"data2\" / \"other.txt\").touch()\n    (tmp_dir / \"data2\" / \"subdir\" / \"keep.csv\").touch()\n    (tmp_dir / \"data2\" / \"subdir\" / \"other.txt\").touch()\n    (tmp_dir / \"ignore.txt\").touch()\n    (tmp_dir / \"no-ignore.txt\").touch()\n\n    ignore_file = DvcIgnorePatterns.from_file(\n        os.fspath(tmp_dir / \".dvcignore\"), localfs, \".dvcignore\"\n    )\n    dvc.__dict__.pop(\"dvcignore\", None)\n\n    def matches(path):\n        result, _matches = ignore_file.matches(\n            os.fspath(tmp_dir), path, (tmp_dir / path).is_dir(), details=True\n        )\n        return result, [str(m) for m in _matches]\n\n    for path, *expected in [\n        (\"1\", False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"\"), False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"2\"), False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"2\", \"\"), False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"2\", \"shouldIgnore.txt\"), True, [\".dvcignore:2:1/**\"]),\n        (join(\"1\", \"2\", \"3\"), False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"2\", \"3\", \"\"), False, [\".dvcignore:4:!1/**/\"]),\n        (join(\"1\", \"2\", \"3\", \"shouldKeep.txt\"), True, [\".dvcignore:2:1/**\"]),\n        (\"data\", True, [\".dvcignore:8:data/\"]),\n        (join(\"data\", \"\"), True, [\".dvcignore:8:data/\"]),\n        (join(\"data\", \"keep.csv\"), True, [\".dvcignore:8:data/\"]),\n        (join(\"data\", \"other.csv\"), True, [\".dvcignore:8:data/\"]),\n        (join(\"data\", \"subdir\", \"file.txt\"), True, [\".dvcignore:8:data/\"]),\n        (\"data2\", False, [\".dvcignore:12:!data2/**/\"]),\n        (join(\"data2\", \"\"), False, [\".dvcignore:12:!data2/**/\"]),\n        (join(\"data2\", \"keep.csv\"), False, [\".dvcignore:13:!data2/**/*.csv\"]),\n        (join(\"data2\", \"other.txt\"), True, [\".dvcignore:11:data2/**\"]),\n        (join(\"data2\", \"subdir\"), False, [\".dvcignore:12:!data2/**/\"]),\n        (join(\"data2\", \"subdir\", \"\"), False, [\".dvcignore:12:!data2/**/\"]),\n        (join(\"data2\", \"subdir\", \"keep.csv\"), False, [\".dvcignore:13:!data2/**/*.csv\"]),\n        (join(\"data2\", \"subdir\", \"other.txt\"), True, [\".dvcignore:11:data2/**\"]),\n        (\"ignore.txt\", True, [\".dvcignore:15:ignore.txt\"]),\n        (\"no-ignore.txt\", False, [\".dvcignore:16:!no-ignore.txt\"]),\n    ]:\n        assert matches(path) == tuple(expected), f\"for {path}\"\n\n    def sorted_walk(path):\n        for root, dirs, files in dvc.dvcignore.walk(localfs, path):\n            dirs.sort()\n            files.sort()\n            yield root, dirs, files\n\n    assert list(sorted_walk(os.curdir)) == [\n        (\n            os.curdir,\n            [\"1\", \"data2\"],\n            [\".dvcignore\", \"no-ignore.txt\"],\n        ),\n        (\"1\", [\"2\"], []),\n        (join(\"1\", \"2\"), [\"3\"], []),\n        (join(\"1\", \"2\", \"3\"), [], []),\n        (\"data2\", [\"subdir\"], [\"keep.csv\"]),\n        (join(\"data2\", \"subdir\"), [], [\"keep.csv\"]),\n    ]\n\n\ndef test_ignore_unignore_from_git_example(tmp_dir, dvc, scm):\n    from dvc.fs import localfs\n\n    spec = \"\"\"\\\n# exclude everything except directory foo/bar\n/*\n!/foo\n/foo/*\n!/foo/bar\n\"\"\"\n    (tmp_dir / \".dvcignore\").write_text(spec)\n    for d in [\n        tmp_dir,\n        tmp_dir / \"foo\",\n        tmp_dir / \"foo\" / \"bar\",\n        tmp_dir / \"foo\" / \"baz\",\n        tmp_dir / \"foobar\",\n    ]:\n        d.mkdir(parents=True, exist_ok=True)\n        (d / \"myfile\").touch()\n\n    ignore_file = DvcIgnorePatterns.from_file(\n        os.fspath(tmp_dir / \".dvcignore\"), localfs, \".dvcignore\"\n    )\n    dvc.__dict__.pop(\"dvcignore\", None)\n\n    def matches(path):\n        result, _matches = ignore_file.matches(\n            os.fspath(tmp_dir), path, (tmp_dir / path).is_dir(), details=True\n        )\n        return result, [str(m) for m in _matches]\n\n    for path, *expected in [\n        (\"foo\", False, [\".dvcignore:3:!/foo\"]),\n        (join(\"foo\", \"\"), False, [\".dvcignore:3:!/foo\"]),\n        (join(\"foo\", \"myfile\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"bar\"), False, [\".dvcignore:5:!/foo/bar\"]),\n        (join(\"foo\", \"bar\", \"\"), False, [\".dvcignore:5:!/foo/bar\"]),\n        # matching pattern differs from git for foo/bar/myfile\n        (join(\"foo\", \"bar\", \"myfile\"), False, [\".dvcignore:5:!/foo/bar\"]),\n        (join(\"foo\", \"baz\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"baz\", \"\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"baz\", \"myfile\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"foobar\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"foobar\", \"\"), True, [\".dvcignore:4:/foo/*\"]),\n        (join(\"foo\", \"foobar\", \"myfile\"), True, [\".dvcignore:4:/foo/*\"]),\n    ]:\n        assert matches(path) == tuple(expected), f\"for {path}\"\n\n    assert sorted(dvc.dvcignore.walk(localfs, os.curdir), key=lambda r: r[0]) == [\n        (os.curdir, [\"foo\"], []),\n        (\"foo\", [\"bar\"], []),\n        (join(\"foo\", \"bar\"), [], [\"myfile\"]),\n    ]\n"
  },
  {
    "path": "tests/unit/test_imports.py",
    "content": "import subprocess\nimport sys\n\n\ndef test_no_remote_imports():\n    remote_modules = {\n        \"boto3\",\n        \"botocore\",\n        \"google.cloud.storage\",\n        \"azure.storage.blob\",\n        \"oss2\",\n        \"pydrive2\",\n        \"paramiko\",\n        \"pyarrow\",\n    }\n\n    code = \"import dvc.cli, sys; print(' '.join(sys.modules))\"\n    res = subprocess.run(\n        [sys.executable, \"-c\", code], stdout=subprocess.PIPE, check=True\n    )\n    modules = res.stdout.decode().split()\n    assert not set(modules) & remote_modules\n"
  },
  {
    "path": "tests/unit/test_info.py",
    "content": "import os\nimport re\nimport shutil\n\nimport pytest\n\nfrom dvc.info import SUBPROJECTS, get_dvc_info\n\n# Python's version is in the shape of:\n# <major>.<minor>.<patch>[{a|b|rc}N][.postN][.devN]\n# `patch` is more than enough for the tests.\n# Refer PEP-0440 for complete regex just in-case.\nPYTHON_VERSION_REGEX = r\"Python \\d\\.\\d+\\.\\d+\\S*\"\nDVC_VERSION_REGEX = r\"\\d+\\.\\d+\\.(\\d+\\.)?.*\"\n\n\ndef find_supported_remotes(string):\n    lines = string.splitlines()\n    index = 0\n\n    for index, line in enumerate(lines):\n        if line == \"Supports:\":\n            index += 1\n            break\n    else:\n        return []\n\n    remotes = {}\n    for line in lines[index:]:\n        if not line.startswith(\"\\t\"):\n            break\n\n        remote_name, _, raw_dependencies = line.strip().strip(\",\").partition(\" \")\n        remotes[remote_name] = {\n            dependency: version\n            for dependency, _, version in [\n                dependency.partition(\" = \")\n                for dependency in raw_dependencies[1:-1].split(\", \")\n            ]\n        }\n    return remotes\n\n\n@pytest.mark.parametrize(\"scm_init\", [True, False])\ndef test_info_in_repo(scm_init, tmp_dir):\n    tmp_dir.init(scm=scm_init, dvc=True)\n    # Create `.dvc/cache`, that is needed to check supported link types.\n    os.makedirs(tmp_dir.dvc.cache.local.path)\n\n    dvc_info = get_dvc_info()\n\n    assert re.search(rf\"DVC version: {DVC_VERSION_REGEX}\", dvc_info)\n    assert re.search(f\"Platform: {PYTHON_VERSION_REGEX} on .*\", dvc_info)\n    for subproject in SUBPROJECTS:\n        assert re.search(rf\"{subproject} = .*\", dvc_info)\n\n    assert find_supported_remotes(dvc_info)\n    assert re.search(r\"Cache types: .*\", dvc_info)\n\n    if scm_init:\n        assert \"Repo: dvc, git\" in dvc_info\n    else:\n        assert \"Repo: dvc (no_scm)\" in dvc_info\n\n\ndef test_info_in_subdir(tmp_dir, scm, caplog):\n    dvc_subdir = tmp_dir / \"subdir\"\n    dvc_subdir.mkdir()\n\n    with dvc_subdir.chdir():\n        dvc_subdir.init(scm=False, dvc=True)\n        with dvc_subdir.dvc.config.edit() as conf:\n            del conf[\"core\"][\"no_scm\"]\n\n        dvc_info = get_dvc_info()\n\n    assert \"Repo: dvc (subdir), git\" in dvc_info\n\n\ndef test_info_in_broken_git_repo(tmp_dir, dvc, scm, caplog):\n    shutil.rmtree(dvc.scm.dir)\n    dvc_info = get_dvc_info()\n\n    assert \"Repo: dvc, git (broken)\" in dvc_info\n\n\ndef test_caches(tmp_dir, dvc, caplog):\n    dvc_info = get_dvc_info()\n\n    # Order of cache types is runtime dependent\n    assert re.search(\"Caches: local\", dvc_info)\n\n\ndef test_remotes_empty(tmp_dir, dvc, caplog):\n    # No remotes are configured\n    dvc_info = get_dvc_info()\n\n    assert \"Remotes: None\" in dvc_info\n\n\ndef test_remotes(tmp_dir, dvc, caplog):\n    tmp_dir.add_remote(name=\"server\", url=\"ssh://localhost\", default=False)\n    tmp_dir.add_remote(name=\"r1\", url=\"azure://example.com/path\", default=False)\n    tmp_dir.add_remote(name=\"r2\", url=\"remote://server/path\", default=False)\n\n    dvc_info = get_dvc_info()\n\n    assert re.search(\"Remotes: (ssh, azure|azure, ssh)\", dvc_info)\n\n\ndef test_fs_info_in_repo(tmp_dir, dvc, caplog):\n    os.makedirs(dvc.cache.local.path)\n    dvc_info = get_dvc_info()\n\n    assert re.search(r\"Cache directory: .* on .*\", dvc_info)\n    assert re.search(r\"Workspace directory: .* on .*\", dvc_info)\n\n\ndef test_info_outside_of_repo(tmp_dir, caplog):\n    dvc_info = get_dvc_info()\n\n    assert re.search(rf\"DVC version: {DVC_VERSION_REGEX}\", dvc_info)\n    assert re.search(f\"Platform: {PYTHON_VERSION_REGEX} on .*\", dvc_info)\n    assert find_supported_remotes(dvc_info)\n    assert not re.search(r\"Cache types: .*\", dvc_info)\n    assert \"Repo:\" not in dvc_info\n\n\ndef test_fs_info_outside_of_repo(tmp_dir, caplog):\n    dvc_info = get_dvc_info()\n    assert re.search(rf\"DVC version: {DVC_VERSION_REGEX}\", dvc_info)\n    assert re.search(f\"Platform: {PYTHON_VERSION_REGEX} on .*\", dvc_info)\n    assert find_supported_remotes(dvc_info)\n\n\ndef test_plugin_versions(tmp_dir, dvc):\n    from dvc.fs import registry\n\n    dvc_info = get_dvc_info()\n    remotes = find_supported_remotes(dvc_info)\n\n    for remote, dependencies in remotes.items():\n        assert dependencies.keys() == registry[remote].REQUIRES.keys()\n"
  },
  {
    "path": "tests/unit/test_interpolate.py",
    "content": "from math import inf, pi\n\nimport pytest\n\nfrom dvc.parsing.context import Context, recurse_not_a_node\n\n\n@pytest.mark.parametrize(\n    \"template, var\", [(\"${value}\", \"value\"), (\"${ item }\", \"item\")]\n)\n@pytest.mark.parametrize(\n    \"data\", [True, 12, pi, None, False, 0, \"0\", \"123\", \"Foobar\", \"\", inf, 3e4]\n)\ndef test_resolve_primitive_values(data, template, var):\n    context = Context({var: data})\n    assert context.resolve(template) == data\n\n\n@pytest.mark.parametrize(\n    \"template, expected\",\n    [\n        (r\"\\${value}\", \"${value}\"),\n        (r\"\\${ value }\", \"${ value }\"),\n        (r\"\\${ value } days\", \"${ value } days\"),\n        (r\"Month of \\${value}\", \"Month of ${value}\"),\n        (r\"May the \\${value} be with you\", \"May the ${value} be with you\"),\n        (\n            r\"Great shot kid, that was \\${value} in a ${value}\",\n            \"Great shot kid, that was ${value} in a value\",\n        ),\n    ],\n)\ndef test_escape(template, expected):\n    context = Context({\"value\": \"value\"})\n    assert context.resolve(template) == expected\n\n\ndef test_resolve_str():\n    template = \"My name is ${last}, ${first} ${last}\"\n    expected = \"My name is Bond, James Bond\"\n    context = Context({\"first\": \"James\", \"last\": \"Bond\"})\n    assert context.resolve(template) == expected\n\n\ndef test_resolve_primitives_dict_access():\n    data = {\n        \"dict\": {\n            \"num\": 5,\n            \"string\": \"foo\",\n            \"nested\": {\"float\": pi, \"string\": \"bar\"},\n        }\n    }\n    context = Context(data)\n\n    assert context.resolve(\"${dict.num}\") == 5\n    assert context.resolve(\"${dict.string}\") == \"foo\"\n    assert context.resolve(\"${dict.nested.float}\") == pi\n    assert context.resolve(\"${dict.nested.string}\") == \"bar\"\n\n    assert context.resolve(\"Number ${dict.num}\") == \"Number 5\"\n\n\ndef test_resolve_primitives_list_access():\n    context = Context(\n        {\n            \"dict\": [\n                {\"f\": \"f\"},\n                {\"fo\": \"fo\"},\n                {\"foo\": \"foo\"},\n                {\"foo\": [\"f\", \"o\", \"o\"]},\n            ]\n        }\n    )\n\n    assert context.resolve(\"${dict[0].f}\") == \"f\"\n    assert context.resolve(\"${dict[1].fo}\") == \"fo\"\n    assert context.resolve(\"${dict[2].foo}\") == \"foo\"\n    assert context.resolve(\"${dict[3].foo[0]}\") == \"f\"\n\n    assert context.resolve(\"${ dict.1.fo}${dict.3.foo.1}bar\") == \"foobar\"\n\n\ndef test_resolve_collection():\n    from tests.func.parsing import (\n        CONTEXT_DATA,\n        RESOLVED_DVC_YAML_DATA,\n        TEMPLATED_DVC_YAML_DATA,\n    )\n\n    context = Context(CONTEXT_DATA)\n    resolved = context.resolve(TEMPLATED_DVC_YAML_DATA)\n    assert resolved == RESOLVED_DVC_YAML_DATA\n    assert recurse_not_a_node(resolved)\n\n\ndef test_resolve_unicode():\n    context = Context({\"नेपाली\": {\"चिया\": [\"चि\", \"या\"]}})\n    assert context.resolve_str(\"${नेपाली.चिया[0]}${नेपाली.चिया[1]}\") == \"चिया\"\n    assert context.resolve_str(\"${नेपाली[चिया][0]}${नेपाली[चिया][1]}\") == \"चिया\"\n"
  },
  {
    "path": "tests/unit/test_lockfile.py",
    "content": "import pytest\n\nfrom dvc.dvcfile import FileIsGitIgnored, Lockfile\nfrom dvc.stage import PipelineStage\nfrom dvc.utils.strictyaml import YAMLValidationError\n\n\ndef test_stage_dump_no_outs_deps(tmp_dir, dvc):\n    stage = PipelineStage(name=\"s1\", repo=dvc, path=\"path\", cmd=\"command\")\n    lockfile = Lockfile(dvc, \"path.lock\")\n    lockfile.dump(stage)\n    assert lockfile.load() == {\n        \"schema\": \"2.0\",\n        \"stages\": {\"s1\": {\"cmd\": \"command\"}},\n    }\n\n\ndef test_stage_dump_when_already_exists(tmp_dir, dvc):\n    data = {\"s1\": {\"cmd\": \"command\", \"deps\": [], \"outs\": []}}\n    (tmp_dir / \"path.lock\").dump({\"schema\": \"2.0\", \"stages\": data})\n    stage = PipelineStage(name=\"s2\", repo=dvc, path=\"path\", cmd=\"command2\")\n    lockfile = Lockfile(dvc, \"path.lock\")\n    lockfile.dump(stage)\n    assert lockfile.load() == {\n        \"schema\": \"2.0\",\n        \"stages\": {**data, \"s2\": {\"cmd\": \"command2\"}},\n    }\n\n\ndef test_stage_dump_with_deps_and_outs(tmp_dir, dvc):\n    data = {\n        \"s1\": {\n            \"cmd\": \"command\",\n            \"deps\": [{\"md5\": \"1.txt\", \"path\": \"checksum\"}],\n            \"outs\": [{\"md5\": \"2.txt\", \"path\": \"checksum\"}],\n        }\n    }\n    (tmp_dir / \"path.lock\").dump({\"schema\": \"2.0\", \"stages\": data})\n    lockfile = Lockfile(dvc, \"path.lock\")\n    stage = PipelineStage(name=\"s2\", repo=dvc, path=\"path\", cmd=\"command2\")\n    lockfile.dump(stage)\n    assert lockfile.load() == {\n        \"schema\": \"2.0\",\n        \"stages\": {**data, \"s2\": {\"cmd\": \"command2\"}},\n    }\n\n\ndef test_stage_overwrites_if_already_exists(tmp_dir, dvc):\n    lockfile = Lockfile(dvc, \"path.lock\")\n    stage = PipelineStage(name=\"s2\", repo=dvc, path=\"path\", cmd=\"command2\")\n    lockfile.dump(stage)\n    stage = PipelineStage(name=\"s2\", repo=dvc, path=\"path\", cmd=\"command3\")\n    lockfile.dump(stage)\n    assert lockfile.load() == {\n        \"schema\": \"2.0\",\n        \"stages\": {\"s2\": {\"cmd\": \"command3\"}},\n    }\n\n\ndef test_load_when_lockfile_does_not_exist(tmp_dir, dvc):\n    assert Lockfile(dvc, \"pipelines.lock\").load() == {}\n\n\n@pytest.mark.parametrize(\n    \"corrupt_data\",\n    [\n        {\"s1\": {\"outs\": []}},\n        {\"s1\": {}},\n        {\n            \"s1\": {\n                \"cmd\": \"command\",\n                \"outs\": [{\"md5\": \"checksum\", \"path\": \"path\", \"random\": \"value\"}],\n            }\n        },\n        {\"s1\": {\"cmd\": \"command\", \"deps\": [{\"md5\": \"checksum\"}]}},\n    ],\n)\ndef test_load_when_lockfile_is_corrupted(tmp_dir, dvc, corrupt_data):\n    (tmp_dir / \"Dvcfile.lock\").dump(corrupt_data)\n    lockfile = Lockfile(dvc, \"Dvcfile.lock\")\n    with pytest.raises(YAMLValidationError) as exc_info:\n        lockfile.load()\n    assert \"Dvcfile.lock\" in str(exc_info.value)\n\n\n@pytest.mark.parametrize(\"dvcignored\", [True, False])\n@pytest.mark.parametrize(\"file_exists\", [True, False])\ndef test_try_loading_lockfile_that_is_gitignored(\n    tmp_dir, dvc, scm, dvcignored, file_exists\n):\n    # it should raise error if the file is git-ignored, even if:\n    #   1. The file does not exist at all.\n    #   2. Or, is dvc-ignored.\n    files = [\".gitignore\"]\n    if dvcignored:\n        files.append(\".dvcignore\")\n\n    for file in files:\n        with (tmp_dir / file).open(mode=\"a+\") as fd:\n            fd.write(\"dvc.lock\")\n\n    if file_exists:\n        (tmp_dir / \"dvc.lock\").write_text(\"\")\n\n    scm._reset()\n\n    with pytest.raises(FileIsGitIgnored) as exc_info:\n        Lockfile(dvc, \"dvc.lock\").load()\n\n    assert str(exc_info.value) == \"'dvc.lock' is git-ignored.\"\n"
  },
  {
    "path": "tests/unit/test_logger.py",
    "content": "import logging\nimport traceback\n\nimport colorama\nimport pytest\n\nimport dvc.logger\nfrom dvc.exceptions import DvcException\n\nlogger = logging.getLogger(\"dvc\")\nformatter = dvc.logger.ColorFormatter()\ncolors = {\n    \"blue\": colorama.Fore.BLUE,\n    \"green\": colorama.Fore.GREEN,\n    \"red\": colorama.Fore.RED,\n    \"yellow\": colorama.Fore.YELLOW,\n    \"nc\": colorama.Fore.RESET,\n}\n\n\n@pytest.fixture\ndef dt(mocker):\n    dt_str = \"2020-02-02 00:00:00,000\"\n    mocker.patch.object(formatter, \"formatTime\", return_value=dt_str)\n    return dt_str\n\n\nclass TestColorFormatter:\n    def test_debug(self, caplog, dt):\n        with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n            logger.debug(\"message\")\n\n            expected = \"{blue}{datetime}{nc} {blue}DEBUG{nc}: message\".format(\n                **colors, datetime=dt\n            )\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_info(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            logger.info(\"message\")\n\n            assert formatter.format(caplog.records[0]) == \"message\"\n\n    def test_warning(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            logger.warning(\"message\")\n\n            expected = \"{yellow}WARNING{nc}: message\".format(**colors)\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_error(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            logger.error(\"message\")\n\n            expected = \"{red}ERROR{nc}: message\".format(**colors)\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_exception(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            try:\n                raise ValueError\n            except Exception:\n                logger.exception(\"message\")\n\n            expected = \"{red}ERROR{nc}: message\".format(**colors)\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_exception_with_description_and_without_message(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            try:\n                raise Exception(\"description\")\n            except Exception:\n                logger.exception(\"\")\n\n            expected = \"{red}ERROR{nc}: description\".format(**colors)\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_exception_with_description_and_message(self, caplog):\n        with caplog.at_level(logging.INFO, logger=\"dvc\"):\n            try:\n                raise Exception(\"description\")\n            except Exception:\n                logger.exception(\"message\")\n\n            expected = \"{red}ERROR{nc}: message - description\".format(**colors)\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_exception_under_verbose(self, caplog, dt):\n        with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n            try:\n                raise Exception(\"description\")\n            except Exception:\n                stack_trace = traceback.format_exc()\n                logger.exception(\"\")\n\n            expected = (\n                \"{red}{datetime}{nc} {red}ERROR{nc}: description\\n{stack_trace}\".format(\n                    stack_trace=stack_trace,\n                    **colors,\n                    datetime=dt,\n                )\n            )\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_exc_info_on_other_record_types(self, caplog, dt):\n        with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n            try:\n                raise Exception(\"description\")\n            except Exception:\n                stack_trace = traceback.format_exc()\n                logger.debug(\"\", exc_info=True)\n\n            expected = (\n                \"{blue}{datetime}{nc} \"\n                \"{blue}DEBUG{nc}: description\\n\"\n                \"{stack_trace}\".format(\n                    stack_trace=stack_trace,\n                    datetime=dt,\n                    **colors,\n                )\n            )\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_tb_only(self, caplog, dt):\n        with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n            try:\n                raise Exception(\"description\")\n            except Exception:\n                stack_trace = traceback.format_exc()\n                logger.exception(\"something\", extra={\"tb_only\": True})\n\n            expected = (\n                \"{red}{datetime}{nc} {red}ERROR{nc}: something\\n{stack_trace}\".format(\n                    stack_trace=stack_trace,\n                    **colors,\n                    datetime=dt,\n                )\n            )\n\n            assert expected == formatter.format(caplog.records[0])\n\n    def test_nested_exceptions(self, caplog, dt):\n        with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n            try:\n                raise Exception(\"first\")\n            except Exception as exc:  # noqa: BLE001\n                try:\n                    raise DvcException(\"second\") from exc\n                except DvcException:\n                    stack_trace = traceback.format_exc()\n                    logger.exception(\"message\")\n\n            expected = (\n                \"{red}{datetime}{nc} \"\n                \"{red}ERROR{nc}: message - second: first\\n\"\n                \"{stack_trace}\".format(\n                    stack_trace=stack_trace,\n                    **colors,\n                    datetime=dt,\n                )\n            )\n            assert expected == formatter.format(caplog.records[0])\n            assert \"Exception: first\" in stack_trace\n            assert \"dvc.exceptions.DvcException: second\" in stack_trace\n\n    def test_progress_awareness(self, mocker, capsys, caplog):\n        from dvc.progress import Tqdm\n\n        mocker.patch(\"sys.stdout.isatty\", return_value=True)\n        with Tqdm(total=100, desc=\"progress\") as pbar:\n            pbar.update()\n\n            # logging an invisible message should not break\n            # the progress bar output\n            with caplog.at_level(logging.INFO, logger=\"dvc\"):\n                debug_record = logging.LogRecord(\n                    name=\"dvc\",\n                    level=logging.DEBUG,\n                    pathname=__name__,\n                    lineno=1,\n                    msg=\"debug\",\n                    args=(),\n                    exc_info=None,\n                )\n\n                formatter.format(debug_record)\n                captured = capsys.readouterr()\n                assert not captured.out\n\n            #  when the message is actually visible\n            with caplog.at_level(logging.INFO, logger=\"dvc\"):\n                logger.info(\"some info\")\n                captured = capsys.readouterr()\n                assert not captured.out\n\n\ndef test_handlers():\n    out, deb, vrb, err = logger.handlers\n\n    assert out.level == logging.INFO\n    assert deb.level == logging.DEBUG\n    assert vrb.level == logging.TRACE\n    assert err.level == logging.WARNING\n\n\ndef test_logging_debug_with_datetime(caplog, dt):\n    with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n        logger.warning(\"WARNING\")\n        logger.debug(\"DEBUG\")\n        logger.trace(\"TRACE\")\n        logger.error(\"ERROR\")\n\n        for record in caplog.records:\n            assert dt in formatter.format(record)\n            assert record.levelname == record.message\n\n\ndef test_info_with_debug_loglevel_shows_no_datetime(caplog, dt):\n    with caplog.at_level(logging.DEBUG, logger=\"dvc\"):\n        logger.info(\"message\")\n\n        assert formatter.format(caplog.records[0]) == \"message\"\n\n\ndef test_add_existing_level(caplog, dt):\n    # Common pattern to configure logging level in external libraries\n    # eg:\n    # https://github.com/bokeh/bokeh/blob/04bb30fef2e72e64baaa8b2f330806d5bfdd3b11/\n    # bokeh/util/logconfig.py#L79-L85\n    TRACE2 = 4  # noqa: N806\n    logging.addLevelName(TRACE2, \"TRACE2\")\n    logging.TRACE2 = TRACE2\n\n    dvc.logger.add_logging_level(\"TRACE2\", 2)\n\n    # DVC sets all expected entrypoints, but doesn't override the level\n    assert logging.TRACE2 == 4\n    assert hasattr(logging, \"trace2\")\n    assert hasattr(logger, \"trace2\")\n    assert logging.getLevelName(\"TRACE2\") == 4\n\n    # The TRACE2 logging level uses the original, higher logging level\n    with caplog.at_level(logging.TRACE2, logger=\"dvc\"):\n        logger.trace2(\"TRACE2\")\n    assert len(caplog.records) == 1\n\n    (record,) = caplog.records\n    assert record.levelno == 4\n    assert record.levelname == \"TRACE2\"\n    assert record.message == \"TRACE2\"\n"
  },
  {
    "path": "tests/unit/test_metrics.py",
    "content": "import json\nimport os\n\n\ndef test_metrics_order(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"p.json\": json.dumps({\"p1\": 1}),\n            \"p1.json\": json.dumps({\"p2\": 1}),\n            \"sub\": {\n                \"p3.json\": json.dumps({\"p3\": 1}),\n                \"p4.json\": json.dumps({\"p4\": 1}),\n            },\n        }\n    )\n\n    dvc.stage.add(\n        metrics=[\"p.json\", str(tmp_dir / \"sub\" / \"p4.json\")],\n        cmd=\"cmd1\",\n        name=\"stage1\",\n    )\n    with (tmp_dir / \"sub\").chdir():\n        dvc.stage.add(\n            metrics=[str(tmp_dir / \"p1.json\"), \"p3.json\"],\n            cmd=\"cmd2\",\n            name=\"stage2\",\n        )\n\n    assert list(dvc.metrics.show()[\"\"][\"data\"]) == [\n        \"p.json\",\n        os.path.join(\"sub\", \"p4.json\"),\n        \"p1.json\",\n        os.path.join(\"sub\", \"p3.json\"),\n    ]\n"
  },
  {
    "path": "tests/unit/test_params.py",
    "content": "import os\n\nfrom dvc.utils.serialize import dumps_yaml\n\n\ndef test_params_order(tmp_dir, dvc):\n    tmp_dir.gen(\n        {\n            \"params.yaml\": dumps_yaml({\"p\": 1}),\n            \"params1.yaml\": dumps_yaml({\"p1\": 1}),\n            \"sub\": {\"params2.yaml\": dumps_yaml({\"p2\": 1})},\n        }\n    )\n\n    params_path = os.path.join(\"..\", \"params.yaml\")\n    p2_path = os.path.join(\"sub\", \"params2.yaml\")\n    dvc.stage.add(\n        params=[{p2_path: [\"p2\"]}, {\"params1.yaml\": [\"p1\"]}],\n        cmd=\"cmd1\",\n        name=\"stage1\",\n    )\n    with (tmp_dir / \"sub\").chdir():\n        dvc.stage.add(params=[{params_path: [\"p\"]}], cmd=\"cmd2\", name=\"stage2\")\n\n    # params are sorted during dumping, therefore p1 is first\n    assert list(dvc.params.show()[\"\"][\"data\"]) == [\n        \"params1.yaml\",\n        p2_path,\n        \"params.yaml\",\n    ]\n\n\ndef test_repro_unicode(tmp_dir, dvc):\n    tmp_dir.gen({\"settings.json\": '{\"Ω_value\": 1}'})\n    stage = dvc.stage.add(\n        params=[{\"settings.json\": [\"Ω_value\"]}], cmd=\"cmd\", name=\"stage1\"\n    )\n    assert dvc.reproduce(dry=True) == [stage]\n\n    stage.cmd = \"foo\"\n    stage.dump()\n\n    dvc.remove(stage.name)\n    assert not (tmp_dir / \"dvc.yaml\").exists()\n    assert not (tmp_dir / \"dvc.lock\").exists()\n"
  },
  {
    "path": "tests/unit/test_pathspec_math.py",
    "content": "import pytest\n\nfrom dvc.pathspec_math import PatternInfo, _change_dirname\n\n\n@pytest.mark.parametrize(\n    \"patterns, dirname, changed\",\n    [\n        # A line starting with # serves as a comment.\n        (\"#comment\", \"/dir\", \"#comment\"),\n        # Put a backslash (\"\\\") in front of the first hash for patterns that\n        # begin with a hash.\n        (\"\\\\#hash\", \"/dir\", \"dir/**/#hash\"),\n        (\"\\\\#hash\", \"/#dir\", \"#dir/**/#hash\"),\n        # Trailing spaces are ignored unless they are quoted with\n        # backslash (\"\\\").\n        (\" space\", \"/dir\", \"dir/**/space\"),\n        (\"\\\\ space\", \"/dir\", \"dir/**/ space\"),\n        # An optional prefix \"!\" which negates the pattern;\n        (\"!include\", \"/dir\", \"!/dir/**/include\"),\n        # Put a backslash (\"\\\") in front of the first \"!\" for patterns that\n        # begin with a literal \"!\", for example, \"\\!important!.txt\".\n        (\"\\\\!important!.txt\", \"/dir\", \"dir/**/!important!.txt\"),\n        # If there is a separator at the beginning or middle (or both) of the\n        # pattern, then the pattern is relative to the directory level of the\n        # particular .gitignore file itself.\n        (\"/separator.txt\", \"/dir\", \"dir/separator.txt\"),\n        (\"subdir/separator.txt\", \"/dir\", \"dir/subdir/separator.txt\"),\n        # Otherwise the pattern may also match at any level below\n        # the .gitignore level.\n        (\"no_sep\", \"/dir\", \"dir/**/no_sep\"),\n        # If there is a separator at the end of the pattern then the pattern\n        # will only match directories, otherwise the pattern can match both\n        # files and directories.\n        (\"doc/fortz/\", \"/dir\", \"dir/doc/fortz/\"),\n        (\"fortz/\", \"/dir\", \"dir/**/fortz/\"),\n        # An asterisk \"*\" matches anything except a slash.\n        (\"*aste*risk*\", \"/dir\", \"dir/**/*aste*risk*\"),\n        # The character \"?\" matches any one character except \"/\".\n        (\"?fi?le?\", \"/dir\", \"dir/**/?fi?le?\"),\n        # The range notation, e.g. [a-zA-Z], can be used to match one of the\n        # characters in a range. See fnmatch(3) and the FNM_PATHNAME flag\n        # for a more detailed description.\n        (\"[a-zA-Z]file[a-zA-Z]\", \"/dir\", \"dir/**/[a-zA-Z]file[a-zA-Z]\"),\n        # Two consecutive asterisks (\"**\") in patterns matched against full\n        # pathname may have special meaning:\n        # A leading \"**\" followed by a slash means match in all directories.\n        # For example, \"**/foo\" matches file or directory \"foo\" anywhere,\n        # the same as pattern \"foo\".\n        (\"**/foo\", \"/dir\", \"dir/**/foo\"),\n        # \"**/foo/bar\" matches file or directory \"bar\" anywhere that is\n        # directly under directory \"foo\".\n        (\"**/foo/bar\", \"/dir\", \"dir/**/foo/bar\"),\n        # A trailing \"/**\" matches everything inside.\n        # For example, \"abc/**\" matches all files inside directory \"abc\",\n        # relative to the location of the .gitignore file, with infinite depth.\n        (\"abc/**\", \"/dir\", \"dir/abc/**\"),\n        # A slash followed by two consecutive asterisks then a slash matches\n        # zero or more directories. For example, \"a/**/b\"\n        # matches \"a/b\", \"a/x/b\", \"a/x/y/b\" and so on.\n        (\"a/**/b\", \"/dir\", \"dir/a/**/b\"),\n        # Other consecutive asterisks are considered regular asterisks and\n        # will match according to the previous rules.\n        (\"/***.txt\", \"/dir\", \"dir/***.txt\"),\n        (\"data/***\", \"/dir\", \"dir/data/***\"),\n        (\"***/file.txt\", \"/dir\", \"dir/***/file.txt\"),\n        (\"***file\", \"/dir\", \"dir/**/***file\"),\n        (\"a/***/b\", \"/dir\", \"dir/a/***/b\"),\n    ],\n)\ndef test_dvcignore_pattern_change_dir(tmp_dir, patterns, dirname, changed):\n    assert _change_dirname(dirname, [PatternInfo(patterns, \"\")], \"/\") == [\n        PatternInfo(changed, \"\")\n    ]\n"
  },
  {
    "path": "tests/unit/test_progress.py",
    "content": "import logging\n\nfrom dvc.progress import Tqdm\nfrom dvc.utils import env2bool\n\n\ndef test_quiet_logging(caplog, capsys):\n    with caplog.at_level(logging.CRITICAL, logger=\"dvc\"):\n        for _ in Tqdm(range(10)):\n            pass\n        out_err = capsys.readouterr()\n        assert not out_err.out\n        assert not out_err.err\n\n\ndef test_quiet_logging_disable_false(caplog, capsys, mocker):\n    # simulate interactive terminal\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    with caplog.at_level(logging.CRITICAL, logger=\"dvc\"):\n        for _ in Tqdm(range(10), disable=False):\n            pass\n        out_err = capsys.readouterr()\n        assert not out_err.out\n        assert not out_err.err\n\n\ndef test_quiet_notty(caplog, capsys):\n    with caplog.at_level(logging.INFO, logger=\"dvc\"):\n        for _ in Tqdm(range(10)):\n            pass\n        out_err = capsys.readouterr()\n        assert not out_err.out\n        if env2bool(\"DVC_IGNORE_ISATTY\"):\n            assert \"0/10\" in out_err.err\n        else:\n            assert not out_err.err\n\n\ndef test_default(caplog, capsys, mocker):\n    # simulate interactive terminal\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    with caplog.at_level(logging.INFO, logger=\"dvc\"):\n        for _ in Tqdm(range(10)):\n            pass\n\n        out_err = capsys.readouterr()\n        assert not out_err.out\n        assert \"0/10\" in out_err.err\n"
  },
  {
    "path": "tests/unit/test_prompt.py",
    "content": "from dvc.prompt import confirm\n\n\ndef test_confirm_in_tty_if_stdin_is_closed(mocker):\n    mock_input = mocker.patch(\"dvc.prompt.input\", side_effect=EOFError)\n    mock_isatty = mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    ret = confirm(\"message\")\n    mock_isatty.assert_called()\n    mock_input.assert_called()\n    assert not ret\n"
  },
  {
    "path": "tests/unit/test_run.py",
    "content": "import pytest\n\nfrom dvc.stage.utils import is_valid_name\n\n\n@pytest.mark.parametrize(\"name\", [\"copy_name\", \"copy-name\", \"copyName\", \"12\"])\ndef test_valid_stage_names(name):\n    assert is_valid_name(name)\n\n\n@pytest.mark.parametrize(\"name\", [\"copy$name\", \"copy-name?\", \"copy-name@v1\"])\ndef test_invalid_stage_names(name):\n    assert not is_valid_name(name)\n"
  },
  {
    "path": "tests/unit/test_rwlock.py",
    "content": "import json\nimport os\n\nimport pytest\n\nfrom dvc.fs import localfs\nfrom dvc.lock import LockError\nfrom dvc.rwlock import (\n    RWLockFileCorruptedError,\n    RWLockFileFormatError,\n    _edit_rwlock,\n    rwlock,\n)\n\n\ndef test_rwlock(tmp_path):\n    path = os.fspath(tmp_path)\n    foo = \"foo\"\n\n    with rwlock(path, localfs, \"cmd1\", [foo], [], False):\n        with pytest.raises(LockError):\n            with rwlock(path, localfs, \"cmd2\", [], [foo], False):\n                pass\n\n    with rwlock(path, localfs, \"cmd1\", [], [foo], False):\n        with pytest.raises(LockError):\n            with rwlock(path, localfs, \"cmd2\", [foo], [], False):\n                pass\n\n    with rwlock(path, localfs, \"cmd1\", [], [foo], False):\n        with pytest.raises(LockError):\n            with rwlock(path, localfs, \"cmd2\", [], [foo], False):\n                pass\n\n\ndef test_rwlock_reentrant(tmp_path):\n    path = os.fspath(tmp_path)\n    foo = \"foo\"\n\n    with rwlock(path, localfs, \"cmd1\", [], [foo], False):\n        with rwlock(path, localfs, \"cmd1\", [], [foo], False):\n            pass\n        with _edit_rwlock(path, localfs, False) as lock:\n            assert lock == {\n                \"read\": {},\n                \"write\": {\"foo\": {\"cmd\": \"cmd1\", \"pid\": os.getpid()}},\n            }\n\n    with rwlock(path, localfs, \"cmd\", [foo], [], False):\n        with rwlock(path, localfs, \"cmd\", [foo], [], False):\n            pass\n        with _edit_rwlock(path, localfs, False) as lock:\n            assert lock == {\n                \"read\": {\"foo\": [{\"cmd\": \"cmd\", \"pid\": os.getpid()}]},\n                \"write\": {},\n            }\n\n\ndef test_rwlock_edit_is_guarded(tmp_path, mocker):\n    # patching to speedup tests\n    mocker.patch(\"dvc.lock.DEFAULT_TIMEOUT\", 0.01)\n\n    path = os.fspath(tmp_path)\n\n    with _edit_rwlock(path, localfs, False):\n        with pytest.raises(LockError):\n            with _edit_rwlock(path, localfs, False):\n                pass\n\n\ndef test_rwlock_subdirs(tmp_path):\n    path = os.fspath(tmp_path)\n    foo = \"foo\"\n    subfoo = os.path.join(\"foo\", \"subfoo\")\n\n    with rwlock(path, localfs, \"cmd1\", [foo], [], False):\n        with pytest.raises(LockError, match=r\"subfoo(.|\\n)*cmd1\"):\n            with rwlock(path, localfs, \"cmd2\", [], [subfoo], False):\n                pass\n\n    with rwlock(path, localfs, \"cmd1\", [], [subfoo], False):\n        with pytest.raises(LockError, match=r\"'foo'(.|\\n)*cmd1\"):\n            with rwlock(path, localfs, \"cmd2\", [foo], [], False):\n                pass\n\n    with rwlock(path, localfs, \"cmd1\", [], [subfoo], False):\n        with pytest.raises(LockError):\n            with rwlock(path, localfs, \"cmd2\", [], [foo], False):\n                pass\n\n    with rwlock(path, localfs, \"cmd1\", [subfoo], [], False):\n        with rwlock(path, localfs, \"cmd2\", [foo], [], False):\n            pass\n\n\ndef test_broken_rwlock(tmp_path):\n    dir_path = os.fspath(tmp_path)\n    path = tmp_path / \"rwlock\"\n\n    path.write_text('{\"broken\": \"format\"}', encoding=\"utf-8\")\n    with pytest.raises(RWLockFileFormatError):\n        with _edit_rwlock(dir_path, localfs, False):\n            pass\n\n    path.write_text(\"{broken json\", encoding=\"utf-8\")\n    with pytest.raises(RWLockFileCorruptedError):\n        with _edit_rwlock(dir_path, localfs, False):\n            pass\n\n\n@pytest.mark.parametrize(\"return_value\", [True, False])\ndef test_corrupted_rwlock(tmp_path, mocker, return_value):\n    dir_path = os.fspath(tmp_path)\n    path = tmp_path / \"rwlock\"\n\n    foo = \"foo\"\n    bar = \"bar\"\n    cmd_foo = \"cmd_foo\"\n    cmd_bar = \"cmd_bar\"\n    mocker.patch(\"psutil.pid_exists\", return_value=return_value)\n\n    corrupted_rwlock = {\n        \"write\": {foo: {\"pid\": 1234, \"cmd\": cmd_foo}},\n        \"read\": {\n            foo: [{\"pid\": 5555, \"cmd\": cmd_foo}],\n            bar: [\n                {\"pid\": 6666, \"cmd\": cmd_bar},\n                {\"pid\": 7777, \"cmd\": cmd_bar},\n            ],\n        },\n    }\n\n    path.write_text(json.dumps(corrupted_rwlock), encoding=\"utf-8\")\n\n    if return_value:\n        with pytest.raises(LockError):\n            with rwlock(dir_path, localfs, \"cmd_other\", [], [foo, bar], False):\n                pass\n    else:\n        with rwlock(dir_path, localfs, \"cmd_other\", [], [foo, bar], False):\n            pass\n        assert path.read_text() == \"\"\"{\"read\": {}}\"\"\"\n"
  },
  {
    "path": "tests/unit/test_scm.py",
    "content": "import pytest\n\nfrom dvc.exceptions import DvcException\nfrom dvc.scm import resolve_rev\n\n\ndef test_resolve_rev_empty_git_repo(scm):\n    with pytest.raises(DvcException, match=\"unknown Git revision 'HEAD'\"):\n        resolve_rev(scm, \"HEAD\")\n"
  },
  {
    "path": "tests/unit/test_tabular_data.py",
    "content": "import re\n\nimport pytest\n\nfrom dvc.compare import TabularData\n\n\ndef test_table_empty(capsys):\n    td = TabularData([\"Col1\", \"Col2\", \"Col3\"])\n    assert dict(td.items()) == {\"Col1\": [], \"Col2\": [], \"Col3\": []}\n    assert td.columns == [[], [], []]\n    assert td.keys() == [\"Col1\", \"Col2\", \"Col3\"]\n    assert list(td) == []\n    assert td.Col1 == []\n    assert td.Col2 == []\n    assert td.Col3 == []\n\n    assert td[1:] == []\n    with pytest.raises(IndexError):\n        _ = td[1]\n\n    assert len(td) == 0\n    assert td.shape == (3, 0)\n    assert td.to_csv() == \"\"\"Col1,Col2,Col3\\r\\n\"\"\"\n\n    td.render()\n    assert capsys.readouterr() == (\"\", \"\")\n\n    td.render(rich_table=True)\n    assert capsys.readouterr() == (\"\", \"\")\n\n    td.render(markdown=True)\n    assert capsys.readouterr() == (\n        \"| Col1   | Col2   | Col3   |\\n|--------|--------|--------|\\n\\n\",\n        \"\",\n    )\n\n    td.rename(\"Col1\", \"Col11\")\n    assert td.keys() == [\"Col11\", \"Col2\", \"Col3\"]\n\n    td.project(\"Col3\", \"Col11\")\n    assert td.keys() == [\"Col3\", \"Col11\"]\n\n\ndef test_list_operations():\n    td = TabularData([\"col1\", \"col2\", \"col3\"])\n    td.append([\"1\", \"2\", \"3\"])\n\n    assert list(td) == [[\"1\", \"2\", \"3\"]]\n    td.extend(([\"11\", \"12\", \"13\"], [\"21\", \"22\", \"23\"]))\n    assert list(td) == [\n        [\"1\", \"2\", \"3\"],\n        [\"11\", \"12\", \"13\"],\n        [\"21\", \"22\", \"23\"],\n    ]\n    td.insert(1, [\"01\", \"02\", \"03\"])\n    assert list(td) == [\n        [\"1\", \"2\", \"3\"],\n        [\"01\", \"02\", \"03\"],\n        [\"11\", \"12\", \"13\"],\n        [\"21\", \"22\", \"23\"],\n    ]\n    assert td.shape == (3, 4)\n    assert len(td) == 4\n    assert td[1] == [\"01\", \"02\", \"03\"]\n    assert td[1:] == [\n        [\"01\", \"02\", \"03\"],\n        [\"11\", \"12\", \"13\"],\n        [\"21\", \"22\", \"23\"],\n    ]\n    assert td[::-1] == [\n        [\"21\", \"22\", \"23\"],\n        [\"11\", \"12\", \"13\"],\n        [\"01\", \"02\", \"03\"],\n        [\"1\", \"2\", \"3\"],\n    ]\n    del td[1]\n    assert list(td) == [\n        [\"1\", \"2\", \"3\"],\n        [\"11\", \"12\", \"13\"],\n        [\"21\", \"22\", \"23\"],\n    ]\n    assert td.shape == (3, 3)\n    td[1:3] = [[\"51\", \"52\", \"53\"], [\"61\", \"62\", \"63\"]]\n    assert list(td) == [\n        [\"1\", \"2\", \"3\"],\n        [\"51\", \"52\", \"53\"],\n        [\"61\", \"62\", \"63\"],\n    ]\n    td[1] = [\"41\", \"42\", \"43\"]\n    assert td[1] == [\"41\", \"42\", \"43\"]\n\n    del td[1:3]\n    assert td.shape == (3, 1)\n\n    assert td.to_csv() == \"col1,col2,col3\\r\\n1,2,3\\r\\n\"\n\n\ndef test_dict_like_interfaces():\n    td = TabularData([\"col-1\", \"col-2\"])\n\n    td.extend([[\"foo\", \"bar\"], [\"foobar\", \"foobar\"]])\n    assert td.keys() == [\"col-1\", \"col-2\"]\n    assert dict(td.items()) == {\n        \"col-1\": [\"foo\", \"foobar\"],\n        \"col-2\": [\"bar\", \"foobar\"],\n    }\n    assert td.as_dict() == [\n        {\"col-1\": \"foo\", \"col-2\": \"bar\"},\n        {\"col-1\": \"foobar\", \"col-2\": \"foobar\"},\n    ]\n    assert td.as_dict([\"col-1\"]) == [{\"col-1\": \"foo\"}, {\"col-1\": \"foobar\"}]\n\n\ndef test_fill_value():\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"], fill_value=\"?\")\n    td.append([\"foo\"])\n    assert list(td) == [[\"foo\", \"?\", \"?\"]]\n\n    td.extend(\n        [\n            [\"bar\"],\n            [\"foobar\", \"foobar2\"],\n            [\"f\", \"fo\", \"foo\", \"foob\", \"fooba\", \"foobar\"],\n        ]\n    )\n    assert list(td) == [\n        [\"foo\", \"?\", \"?\"],\n        [\"bar\", \"?\", \"?\"],\n        [\"foobar\", \"foobar2\", \"?\"],\n        [\"f\", \"fo\", \"foo\"],\n    ]\n\n    td.insert(1, [\"lorem\"])\n    assert td[1] == [\"lorem\", \"?\", \"?\"]\n\n    td[0] = [\"lorem\", \"ipsum\"]\n    assert td[0] == [\"lorem\", \"ipsum\", \"?\"]\n\n    td[1:2] = [[\"f\", \"fo\"]]\n    assert td[1:2] == [[\"f\", \"fo\", \"?\"]]\n\n    td.add_column(\"col-4\")\n    assert td.keys() == [\"col-1\", \"col-2\", \"col-3\", \"col-4\"]\n    assert td[0][3] == \"?\"\n\n\ndef test_drop():\n    td = TabularData([\"col1\", \"col2\", \"col3\", \"other\"])\n    td.append([\"foo\", \"bar\", \"baz\", \"other_val\"])\n    assert list(td) == [[\"foo\", \"bar\", \"baz\", \"other_val\"]]\n    td.drop(\"col2\")\n    assert td.keys() == [\"col1\", \"col3\", \"other\"]\n    assert list(td) == [[\"foo\", \"baz\", \"other_val\"]]\n\n\ndef test_protected():\n    td = TabularData([\"col1\", \"col2\", \"col3\", \"other\"])\n    td.append([\"foo\", \"bar\", \"baz\", \"other_val\"])\n    td.protect(\"col1\", \"col2\")\n\n    td.drop(\"col1\", \"col2\", \"col3\", \"other\")\n    assert td.keys() == [\"col1\", \"col2\"]\n    assert list(td) == [[\"foo\", \"bar\"]]\n\n    td.unprotect(\"col2\")\n\n    td.drop(\"col1\", \"col2\")\n    assert td.keys() == [\"col1\"]\n    assert list(td) == [[\"foo\"]]\n\n\ndef test_row_from_dict():\n    td = TabularData([\"col1\", \"col2\"])\n    td.row_from_dict({\"col3\": \"value3\", \"col4\": \"value4\"})\n    assert td.keys() == [\"col1\", \"col2\", \"col3\", \"col4\"]\n    assert dict(td.items()) == {\n        \"col1\": [\"\"],\n        \"col2\": [\"\"],\n        \"col3\": [\"value3\"],\n        \"col4\": [\"value4\"],\n    }\n    td.row_from_dict({\"col3\": \"value3\", \"col5\": \"value5\", \"col6\": \"value6\"})\n    assert td.keys() == [\"col1\", \"col2\", \"col3\", \"col4\", \"col5\", \"col6\"]\n    assert dict(td.items()) == {\n        \"col1\": [\"\", \"\"],\n        \"col2\": [\"\", \"\"],\n        \"col3\": [\"value3\", \"value3\"],\n        \"col4\": [\"value4\", \"\"],\n        \"col5\": [\"\", \"value5\"],\n        \"col6\": [\"\", \"value6\"],\n    }\n    assert td.shape == (6, 2)\n    assert list(td) == [\n        [\"\", \"\", \"value3\", \"value4\", \"\", \"\"],\n        [\"\", \"\", \"value3\", \"\", \"value5\", \"value6\"],\n    ]\n\n\n@pytest.mark.parametrize(\n    \"axis,how,data,expected\",\n    [\n        (\n            \"rows\",\n            \"any\",\n            [[\"foo\"], [\"foo\", \"bar\"], [\"foo\", \"bar\", \"foobar\"]],\n            [\n                [\"foo\", \"bar\", \"foobar\"],\n            ],\n        ),\n        (\n            \"rows\",\n            \"all\",\n            [[\"foo\"], [\"foo\", \"bar\"], [\"\", \"\", \"\"]],\n            [\n                [\"foo\", \"\", \"\"],\n                [\"foo\", \"bar\", \"\"],\n            ],\n        ),\n        (\n            \"cols\",\n            \"any\",\n            [[\"foo\"], [\"foo\", \"bar\"], [\"foo\", \"bar\", \"foobar\"]],\n            [[\"foo\"], [\"foo\"], [\"foo\"]],\n        ),\n        (\n            \"cols\",\n            \"all\",\n            [[\"foo\"], [\"foo\", \"bar\"], [\"\", \"\", \"\"]],\n            [[\"foo\", \"\"], [\"foo\", \"bar\"], [\"\", \"\"]],\n        ),\n    ],\n)\ndef test_dropna(axis, how, data, expected):\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"])\n    td.extend(data)\n    td.dropna(axis, how)\n    assert list(td) == expected\n\n\n@pytest.mark.parametrize(\n    \"axis,expected\",\n    [\n        (\"cols\", [[\"foo\", \"\"], [\"foo\", \"\"], [\"foo\", \"foobar\"]]),\n        (\"rows\", [[\"foo\", \"bar\", \"\"], [\"foo\", \"bar\", \"foobar\"]]),\n    ],\n)\ndef test_dropna_subset(axis, expected):\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"])\n    td.extend([[\"foo\"], [\"foo\", \"bar\"], [\"foo\", \"bar\", \"foobar\"]])\n    td.dropna(axis, subset=[\"col-1\", \"col-2\"])\n    assert list(td) == expected\n\n\n@pytest.mark.parametrize(\n    \"axis,expected,ignore_empty\",\n    [\n        (\n            \"rows\",\n            [\n                [\"foo\", \"-\", \"-\"],\n                [\"foo\", \"foo\", \"-\"],\n                [\"foo\", \"bar\", \"foobar\"],\n            ],\n            True,\n        ),\n        (\"cols\", [[\"-\"], [\"foo\"], [\"foo\"], [\"bar\"]], True),\n        (\n            \"cols\",\n            [\n                [\"-\", \"-\"],\n                [\"foo\", \"-\"],\n                [\"foo\", \"-\"],\n                [\"bar\", \"foobar\"],\n            ],\n            False,\n        ),\n    ],\n)\ndef test_drop_duplicates(axis, expected, ignore_empty):\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"], fill_value=\"-\")\n    td.extend([[\"foo\"], [\"foo\", \"foo\"], [\"foo\", \"foo\"], [\"foo\", \"bar\", \"foobar\"]])\n\n    assert list(td) == [\n        [\"foo\", \"-\", \"-\"],\n        [\"foo\", \"foo\", \"-\"],\n        [\"foo\", \"foo\", \"-\"],\n        [\"foo\", \"bar\", \"foobar\"],\n    ]\n\n    td.drop_duplicates(axis, ignore_empty=ignore_empty)\n\n    assert list(td) == expected\n\n\ndef test_drop_duplicates_rich_text():\n    from dvc.ui import ui\n\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"], fill_value=\"-\")\n\n    td.extend(\n        [\n            [\"foo\", None, ui.rich_text(\"-\")],\n            [\"foo\", \"foo\"],\n            [\"foo\", \"foo\"],\n            [\"foo\", \"bar\", \"foobar\"],\n        ]\n    )\n\n    assert list(td) == [\n        [\"foo\", \"-\", ui.rich_text(\"-\")],\n        [\"foo\", \"foo\", \"-\"],\n        [\"foo\", \"foo\", \"-\"],\n        [\"foo\", \"bar\", \"foobar\"],\n    ]\n\n    td.drop_duplicates(\"cols\")\n\n    assert list(td) == [[\"-\"], [\"foo\"], [\"foo\"], [\"bar\"]]\n\n\n@pytest.mark.parametrize(\n    \"axis,subset,expected\",\n    [\n        (\n            \"rows\",\n            [\"col-1\"],\n            [[\"foo\", \"foo\", \"foo\", \"bar\"]],\n        ),\n        (\n            \"rows\",\n            [\"col-1\", \"col-3\"],\n            [\n                [\"foo\", \"foo\", \"foo\", \"bar\"],\n                [\"foo\", \"bar\", \"foobar\", \"bar\"],\n            ],\n        ),\n        (\n            \"cols\",\n            [\"col-1\", \"col-3\"],\n            [\n                [\"foo\", \"foo\", \"bar\"],\n                [\"bar\", \"foo\", \"bar\"],\n                [\"bar\", \"foobar\", \"bar\"],\n            ],\n        ),\n    ],\n)\ndef test_drop_duplicates_subset(axis, subset, expected):\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\", \"col-4\"])\n    td.extend(\n        [\n            [\"foo\", \"foo\", \"foo\", \"bar\"],\n            [\"foo\", \"bar\", \"foo\", \"bar\"],\n            [\"foo\", \"bar\", \"foobar\", \"bar\"],\n        ]\n    )\n    assert list(td) == [\n        [\"foo\", \"foo\", \"foo\", \"bar\"],\n        [\"foo\", \"bar\", \"foo\", \"bar\"],\n        [\"foo\", \"bar\", \"foobar\", \"bar\"],\n    ]\n    td.drop_duplicates(axis, subset=subset)\n    assert list(td) == expected\n\n\ndef test_dropna_invalid_axis():\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"])\n\n    with pytest.raises(ValueError, match=re.escape(\"Invalid 'axis' value foo.\")):\n        td.dropna(\"foo\")\n\n\ndef test_drop_duplicates_invalid_axis():\n    td = TabularData([\"col-1\", \"col-2\", \"col-3\"])\n\n    with pytest.raises(ValueError, match=re.escape(\"Invalid 'axis' value foo.\")):\n        td.drop_duplicates(\"foo\")\n"
  },
  {
    "path": "tests/unit/test_updater.py",
    "content": "import json\nimport logging\nimport os\nimport time\n\nimport pytest\n\nfrom dvc import __version__\nfrom dvc.updater import Updater\nfrom tests.func.parsing.test_errors import escape_ansi\n\n\n@pytest.fixture\ndef tmp_global_dir(mocker, tmp_path):\n    \"\"\"\n    Fixture to prevent modifying the actual global config\n    \"\"\"\n    mocker.patch(\"dvc.config.Config.get_dir\", return_value=str(tmp_path))\n\n\n@pytest.fixture(autouse=True)\ndef mock_env(monkeypatch):\n    monkeypatch.delenv(\"CI\", raising=False)\n    monkeypatch.setenv(\"DVC_TEST\", \"False\")\n\n\n@pytest.fixture\ndef updater(tmp_path, tmp_global_dir):\n    return Updater(tmp_path)\n\n\n@pytest.fixture\ndef mock_tty(mocker):\n    return mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n\ndef test_fetch(mocker, updater):\n    mock_get = mocker.patch(\"requests.get\")\n    mock_get.return_value.status_code = 200\n    mock_get.return_value.json.return_value = {\"version\": __version__}\n\n    assert not os.path.exists(updater.updater_file)\n\n    updater.fetch(detach=False)\n\n    mock_get.assert_called_once_with(Updater.URL, timeout=Updater.TIMEOUT_GET)\n    assert os.path.isfile(updater.updater_file)\n\n    with open(updater.updater_file, encoding=\"utf-8\") as fobj:\n        info = json.load(fobj)\n\n    assert info[\"version\"] == __version__\n\n\n@pytest.mark.parametrize(\n    \"config, result\",\n    [\n        ({}, True),\n        ({\"check_update\": \"true\"}, True),\n        ({\"check_update\": \"false\"}, False),\n    ],\n)\ndef test_is_enabled(dvc, updater, config, result):\n    with dvc.config.edit(validate=False) as conf:\n        conf[\"core\"] = config\n\n    assert result == updater.is_enabled()\n\n\n@pytest.mark.parametrize(\"result\", [True, False])\ndef test_check_update_respect_config(result, updater, mocker):\n    mock_check = mocker.patch(\"dvc.updater.Updater._check\")\n    mocker.patch.object(updater, \"is_enabled\", return_value=result)\n    updater.check()\n    assert result == mock_check.called\n\n\n@pytest.mark.parametrize(\n    \"current,latest,notify\",\n    [\n        (\"0.0.2\", \"0.0.2\", False),\n        (\"0.0.2\", \"0.0.3\", True),\n        (\"0.0.2\", \"0.0.1\", False),\n    ],\n    ids=[\"uptodate\", \"behind\", \"ahead\"],\n)\ndef test_check_updates(mocker, capsys, updater, current, latest, notify):\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n    updater.current = current\n    with open(updater.updater_file, \"w+\", encoding=\"utf-8\") as f:\n        json.dump({\"version\": latest}, f)\n\n    updater.check()\n    out, err = capsys.readouterr()\n    expected_message = (\n        f\"You are using dvc version {current}; \"\n        f\"however, version {latest} is available.\\n\"\n        if notify\n        else \"\"\n    )\n\n    assert expected_message in escape_ansi(err)\n    assert not out\n\n\ndef test_check_refetches_each_day(mock_tty, updater, caplog, mocker):\n    updater.current = \"0.0.8\"\n    with open(updater.updater_file, \"w+\", encoding=\"utf-8\") as f:\n        json.dump({\"version\": \"0.0.9\"}, f)\n    fetch = mocker.patch.object(updater, \"fetch\")\n\n    time_value = time.time() + 24 * 60 * 60 + 10\n    mock_time = mocker.patch(\"time.time\", return_value=time_value)\n\n    caplog.clear()\n    with caplog.at_level(logging.INFO, logger=\"dvc.updater\"):\n        updater.check()\n    assert not caplog.text\n    fetch.assert_called_once()\n    mock_time.assert_called()\n\n\ndef test_check_fetches_on_invalid_data_format(mock_tty, updater, caplog, mocker):\n    updater.current = \"0.0.5\"\n    with open(updater.updater_file, \"w+\", encoding=\"utf-8\") as f:\n        f.write('\"{\"version: \"0.0.6\"')\n    fetch = mocker.patch.object(updater, \"fetch\")\n    caplog.clear()\n    with caplog.at_level(logging.INFO, logger=\"dvc.updater\"):\n        updater.check()\n    assert not caplog.text\n    fetch.assert_called_once()\n\n\ndef test_check(mocker, updater):\n    mock_check = mocker.patch(\"dvc.updater.Updater._check\")\n    updater.check()\n    updater.check()\n    updater.check()\n\n    assert mock_check.call_count == 3\n\n\n@pytest.mark.parametrize(\n    \"pkg, instruction\",\n    [\n        (\"pip\", \"To upgrade, run 'pip install --upgrade dvc'.\"),\n        (\"rpm\", \"To upgrade, run 'yum update dvc'.\"),\n        (\"brew\", \"To upgrade, run 'brew upgrade dvc'.\"),\n        (\"deb\", \"To upgrade, run 'apt-get install --only-upgrade dvc'.\"),\n        (\"conda\", \"To upgrade, run 'conda update dvc'.\"),\n        (\"choco\", \"To upgrade, run 'choco upgrade dvc'.\"),\n        (\"osxpkg\", \"To upgrade, uninstall dvc and reinstall from https://dvc.org.\"),\n        (\"exe\", \"To upgrade, uninstall dvc and reinstall from https://dvc.org.\"),\n        (\"binary\", \"To upgrade, uninstall dvc and reinstall from https://dvc.org.\"),\n        (\n            None,\n            (\n                \"Find the latest release at \"\n                \"https://github.com/treeverse/dvc/releases/latest.\"\n            ),\n        ),\n        (\n            \"unknown\",\n            (\n                \"Find the latest release at \"\n                \"https://github.com/treeverse/dvc/releases/latest.\"\n            ),\n        ),\n    ],\n)\ndef test_notify_message(updater, pkg, instruction):\n    update_message = (\n        \"You are using dvc version 0.0.2; however, version 0.0.3 is available.\"\n    )\n\n    message = updater._get_message(\"0.0.3\", current=\"0.0.2\", pkg=pkg)\n    assert message.plain.splitlines() == [\"\", update_message, instruction]\n"
  },
  {
    "path": "tests/unit/ui/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/ui/test_console.py",
    "content": "import datetime\nimport textwrap\n\nimport pytest\n\nfrom dvc.ui import Console\n\n\ndef test_write(capsys):\n    \"\"\"Test that ui.write works.\"\"\"\n    console = Console(enable=True)\n    message = \"hello world\"\n    console.write(message)\n    console.error_write(message)\n\n    captured = capsys.readouterr()\n    assert captured.out == f\"{message}\\n\"\n    assert captured.err == f\"{message}\\n\"\n\n\n@pytest.mark.parametrize(\n    \"isatty, expected_output\",\n    [\n        (\n            True,\n            textwrap.dedent(\n                \"\"\"\\\n        {\n          \"hello\": \"world\",\n          \"date\": \"1970-01-01 00:00:00\"\n        }\n    \"\"\"\n            ),\n        ),\n        (\n            False,\n            textwrap.dedent(\n                \"\"\"\\\n        {\"hello\": \"world\", \"date\": \"1970-01-01 00:00:00\"}\n        \"\"\"\n            ),\n        ),\n    ],\n)\ndef test_write_json(capsys, mocker, isatty, expected_output):\n    \"\"\"Test that ui.write_json works.\"\"\"\n\n    console = Console(enable=True)\n    mocker.patch.object(console, \"isatty\", return_value=isatty)\n    message = {\"hello\": \"world\", \"date\": datetime.datetime(1970, 1, 1)}  # noqa: DTZ001\n    console.write_json(message, default=str)\n    captured = capsys.readouterr()\n    assert captured.out == expected_output\n\n\ndef test_capsys_works(capsys):\n    \"\"\"Sanity check that capsys can capture outputs from a global ui.\"\"\"\n    from dvc.ui import ui\n\n    message = \"hello world\"\n    ui.write(message)\n    ui.error_write(message)\n\n    captured = capsys.readouterr()\n    assert captured.out == f\"{message}\\n\"\n    assert captured.err == f\"{message}\\n\"\n"
  },
  {
    "path": "tests/unit/ui/test_pager.py",
    "content": "from pydoc import plainpager\n\nimport pytest\n\nfrom dvc.env import DVC_PAGER\nfrom dvc.ui.pager import DEFAULT_PAGER, LESS, PAGER_ENV, find_pager, make_pager, pager\n\n\n@pytest.fixture(autouse=True)\ndef clear_envs(monkeypatch):\n    monkeypatch.delenv(DVC_PAGER, raising=False)\n    monkeypatch.delenv(PAGER_ENV, raising=False)\n    monkeypatch.delenv(LESS, raising=False)\n\n\ndef test_find_pager_when_not_isatty(mocker):\n    mocker.patch(\"sys.stdout.isatty\", return_value=False)\n    assert find_pager() is None\n\n\ndef test_find_pager_uses_custom_pager_when_dvc_pager_env_var_is_defined(\n    mocker, monkeypatch\n):\n    monkeypatch.setenv(DVC_PAGER, \"my-pager\")\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n    assert find_pager() == \"my-pager\"\n\n\ndef test_find_pager_uses_custom_pager_when_pager_env_is_defined(mocker, monkeypatch):\n    monkeypatch.setenv(PAGER_ENV, \"my-pager\")\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n    assert find_pager() == \"my-pager\"\n\n\ndef test_find_pager_uses_default_pager_when_found(mocker):\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    mocker.patch(\"os.system\", return_value=0)\n\n    assert DEFAULT_PAGER in find_pager()\n\n\ndef test_find_pager_fails_to_find_any_pager(mocker):\n    mocker.patch(\"os.system\", return_value=1)\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n    assert find_pager() is None\n\n\n@pytest.mark.parametrize(\"env\", [DVC_PAGER, PAGER_ENV, None])\ndef test_dvc_sets_default_options_on_less_without_less_env(mocker, monkeypatch, env):\n    if env:\n        monkeypatch.setenv(env, \"less\")\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    mocker.patch(\"os.system\", return_value=0)\n\n    assert (\n        find_pager()\n        == \"less --quit-if-one-screen --RAW-CONTROL-CHARS --chop-long-lines --no-init\"\n    )\n\n\n@pytest.mark.parametrize(\"env\", [DVC_PAGER, PAGER_ENV, None])\ndef test_dvc_sets_some_options_on_less_if_less_env_defined(mocker, monkeypatch, env):\n    if env:\n        monkeypatch.setenv(env, \"less\")\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n    mocker.patch(\"os.system\", return_value=0)\n    monkeypatch.setenv(LESS, \"-R\")\n\n    assert find_pager() == \"less --RAW-CONTROL-CHARS --chop-long-lines\"\n\n\ndef test_make_pager_when_no_pager_found():\n    assert make_pager(None) is plainpager\n\n\ndef test_pager(mocker, monkeypatch):\n    monkeypatch.setenv(DVC_PAGER, \"my-pager\")\n    mocker.patch(\"sys.stdout.isatty\", return_value=True)\n\n    m_make_pager = mocker.patch(\"dvc.ui.pager.make_pager\")\n    _pager = m_make_pager.return_value = mocker.MagicMock()\n\n    pager(\"hello world\")\n    m_make_pager.assert_called_once_with(\"my-pager\")\n    _pager.assert_called_once_with(\"hello world\")\n"
  },
  {
    "path": "tests/unit/ui/test_table.py",
    "content": "import textwrap\n\nimport pytest\nfrom rich.style import Style\n\nfrom dvc.ui import ui\n\n\ndef test_plain(capsys):\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n    )\n    captured = capsys.readouterr()\n    assert captured.out == textwrap.dedent(\n        \"\"\"\\\n        first    second\n        foo      bar\n        foo1     bar1\n        foo2     bar2\n    \"\"\"\n    )\n\n\ndef test_plain_md(capsys):\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        markdown=True,\n    )\n    captured = capsys.readouterr()\n    assert captured.out == textwrap.dedent(\n        \"\"\"\\\n        | first   | second   |\n        |---------|----------|\n        | foo     | bar      |\n        | foo1    | bar1     |\n        | foo2    | bar2     |\\n\n    \"\"\"\n    )\n\n\ndef test_plain_pager(mocker):\n    pager_mock = mocker.patch(\"dvc.ui.pager.pager\")\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        pager=True,\n    )\n\n    pager_mock.assert_called_once_with(\n        textwrap.dedent(\n            \"\"\"\\\n            first    second\n            foo      bar\n            foo1     bar1\n            foo2     bar2\n            \"\"\"\n        )\n    )\n\n\ndef test_plain_headerless(capsys):\n    ui.table([(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")])\n    captured = capsys.readouterr()\n    assert captured.out == textwrap.dedent(\n        \"\"\"\\\n        foo   bar\n        foo1  bar1\n        foo2  bar2\n    \"\"\"\n    )\n\n\ndef test_rich_simple(capsys):\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        rich_table=True,\n    )\n    # not able to test the actual style for now\n    captured = capsys.readouterr()\n    assert [row.strip() for row in captured.out.splitlines() if row.strip()] == [\n        \"first  second\",\n        \"foo    bar\",\n        \"foo1   bar1\",\n        \"foo2   bar2\",\n    ]\n\n\ndef test_rich_headerless(capsys):\n    ui.table([(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")], rich_table=True)\n    captured = capsys.readouterr()\n    assert [row.strip() for row in captured.out.splitlines() if row.strip()] == [\n        \"foo   bar\",\n        \"foo1  bar1\",\n        \"foo2  bar2\",\n    ]\n\n\ndef test_rich_border(capsys):\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        rich_table=True,\n        borders=\"simple\",\n    )\n    captured = capsys.readouterr()\n    assert [row.strip() for row in captured.out.splitlines() if row.strip()] == [\n        \"first   second\",\n        \"────────────────\",\n        \"foo     bar\",\n        \"foo1    bar1\",\n        \"foo2    bar2\",\n    ]\n\n\n@pytest.mark.parametrize(\n    \"extra_opts\",\n    [\n        {\"header_styles\": [{\"style\": Style(bold=True)}]},\n        {\"header_styles\": {\"first\": {\"style\": Style(bold=True)}}},\n        {\"row_styles\": [{\"style\": Style(bold=True)}]},\n    ],\n)\ndef test_rich_styles(capsys, extra_opts):\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        rich_table=True,\n        **extra_opts,\n    )\n    # not able to test the actual style for now\n    captured = capsys.readouterr()\n    assert [row.strip() for row in captured.out.splitlines() if row.strip()] == [\n        \"first  second\",\n        \"foo    bar\",\n        \"foo1   bar1\",\n        \"foo2   bar2\",\n    ]\n\n\ndef test_rich_pager(mocker):\n    pager_mock = mocker.patch(\"dvc.ui.pager.pager\")\n\n    ui.table(\n        [(\"foo\", \"bar\"), (\"foo1\", \"bar1\"), (\"foo2\", \"bar2\")],\n        headers=[\"first\", \"second\"],\n        rich_table=True,\n        pager=True,\n    )\n    received_text = pager_mock.call_args[0][0]\n    assert [row.strip() for row in received_text.splitlines() if row.strip()] == [\n        \"first  second\",\n        \"foo    bar\",\n        \"foo1   bar1\",\n        \"foo2   bar2\",\n    ]\n\n\n@pytest.mark.parametrize(\"rich_table\", [True, False])\ndef test_empty(capsys, rich_table):\n    ui.table([], rich_table=rich_table)\n    out, err = capsys.readouterr()\n    assert (out, err) == (\"\", \"\")\n\n\ndef test_empty_markdown(capsys):\n    ui.table([], headers=[\"Col1\", \"Col2\"], markdown=True)\n    out, err = capsys.readouterr()\n    assert (out, err) == (\"| Col1   | Col2   |\\n|--------|--------|\\n\\n\", \"\")\n"
  },
  {
    "path": "tests/unit/utils/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/utils/serialize/__init__.py",
    "content": ""
  },
  {
    "path": "tests/unit/utils/serialize/test_python.py",
    "content": "import pytest\n\nfrom dvc.utils.serialize import parse_py\n\n\n@pytest.mark.parametrize(\n    \"text,result\",\n    [\n        (\"BOOL = True\", {\"BOOL\": True}),\n        (\"INT = 5\", {\"INT\": 5}),\n        (\"FLOAT = 0.001\", {\"FLOAT\": 0.001}),\n        (\"STR = 'abc'\", {\"STR\": \"abc\"}),\n        (\"DICT = {'a': 1, 'b': 2}\", {\"DICT\": {\"a\": 1, \"b\": 2}}),\n        (\"LIST = [1, 2, 3]\", {\"LIST\": [1, 2, 3]}),\n        (\"SET = {1, 2, 3}\", {\"SET\": {1, 2, 3}}),\n        (\"TUPLE = (10, 100)\", {\"TUPLE\": (10, 100)}),\n        (\"NONE = None\", {\"NONE\": None}),\n        (\"UNARY_OP = -1\", {\"UNARY_OP\": -1}),\n        (\n            \"\"\"class TrainConfig:\n\n            EPOCHS = 70\n\n            def __init__(self):\n                self.layers = 5\n                self.layers = 9  # TrainConfig.layers param will be 9\n                bar = 3  # Will NOT be found since it's locally scoped\n            \"\"\",\n            {\"TrainConfig\": {\"EPOCHS\": 70, \"layers\": 9}},\n        ),\n    ],\n)\ndef test_parse_valid_types(text, result):\n    assert parse_py(text, \"foo\") == result\n\n\n@pytest.mark.parametrize(\n    \"text\",\n    [\n        \"CONSTRUCTOR = dict(a=1, b=2)\",\n        \"SUM = 1 + 2\",\n    ],\n)\ndef test_parse_invalid_types(text):\n    assert parse_py(text, \"foo\") == {}\n"
  },
  {
    "path": "tests/unit/utils/serialize/test_toml.py",
    "content": "def test_preserve_comments(tmp_dir):\n    from dvc.utils.serialize._toml import modify_toml\n\n    contents_fmt = \"\"\"\\\n#A Title\n[foo]\nbar = {} # meaning of life\nbaz = [1, 2]\n\"\"\"\n    tmp_dir.gen(\"params.toml\", contents_fmt.format(\"42\"))\n\n    with modify_toml(\"params.toml\") as d:\n        d[\"foo\"][\"bar\"] //= 2\n    assert (tmp_dir / \"params.toml\").read_text() == contents_fmt.format(\"21\")\n\n\ndef test_parse_toml_type():\n    from tomlkit.toml_document import TOMLDocument\n\n    from dvc.utils.serialize._toml import parse_toml\n\n    contents = \"# A Title [foo]\\nbar = 42# meaning of life\\nbaz = [1, 2]\\n\"\n\n    parsed = parse_toml(contents, \".\")\n    assert not isinstance(parsed, TOMLDocument)\n    assert isinstance(parsed, dict)\n\n\ndef test_parse_toml_for_update():\n    from tomlkit.toml_document import TOMLDocument\n\n    from dvc.utils.serialize._toml import parse_toml_for_update\n\n    contents = \"# A Title [foo]\\nbar = 42# meaning of life\\nbaz = [1, 2]\\n\"\n\n    parsed = parse_toml_for_update(contents, \".\")\n    assert isinstance(parsed, TOMLDocument)\n    assert isinstance(parsed, dict)\n"
  },
  {
    "path": "tests/unit/utils/serialize/test_yaml.py",
    "content": "import pytest\n\nfrom dvc.utils.serialize import (\n    EncodingError,\n    YAMLFileCorruptedError,\n    load_yaml,\n    parse_yaml,\n)\n\n\ndef test_parse_yaml_duplicate_key_error():\n    text = \"\"\"\\\n    mykey:\n    - foo\n    mykey:\n    - bar\n    \"\"\"\n    with pytest.raises(YAMLFileCorruptedError):\n        parse_yaml(text, \"mypath\")\n\n\ndef test_parse_yaml_invalid_unicode(tmp_dir):\n    filename = \"invalid_utf8.yaml\"\n    tmp_dir.gen(filename, b\"\\x80some: stuff\")\n\n    with pytest.raises(EncodingError) as excinfo:\n        load_yaml(tmp_dir / filename)\n\n    assert filename in excinfo.value.path\n    assert excinfo.value.encoding == \"utf-8\"\n"
  },
  {
    "path": "tests/unit/utils/test_cli_parse.py",
    "content": "import pytest\n\nfrom dvc.utils.cli_parse import parse_params, to_path_overrides\n\n\ndef test_parse_params():\n    assert parse_params(\n        [\n            \"param1\",\n            \"file1:param1,param2\",\n            \"file2:param2\",\n            \"file1:param3,param4,\",\n            \"param2,param10\",\n            \"param3,\",\n            \"file3:\",\n        ]\n    ) == [\n        {\"params.yaml\": [\"param1\", \"param2\", \"param10\", \"param3\"]},\n        {\"file1\": [\"param1\", \"param2\", \"param3\", \"param4\"]},\n        {\"file2\": [\"param2\"]},\n        {\"file3\": []},\n    ]\n\n\n@pytest.mark.parametrize(\n    \"params,expected\",\n    [\n        ([\"foo=1\"], {\"params.yaml\": [\"foo=1\"]}),\n        ([\"foo={bar: 1}\"], {\"params.yaml\": [\"foo={bar: 1}\"]}),\n        ([\"foo.0=bar\"], {\"params.yaml\": [\"foo.0=bar\"]}),\n        ([\"params.json:foo={bar: 1}\"], {\"params.json\": [\"foo={bar: 1}\"]}),\n        (\n            [\"params.json:foo={bar: 1}\", \"baz=2\", \"goo=3\"],\n            {\n                \"params.json\": [\"foo={bar: 1}\"],\n                \"params.yaml\": [\"baz=2\", \"goo=3\"],\n            },\n        ),\n    ],\n)\ndef test_to_path_overrides(params, expected):\n    assert to_path_overrides(params) == expected\n"
  },
  {
    "path": "tests/unit/utils/test_collections.py",
    "content": "import json\n\nimport pytest\n\nfrom dvc.utils.collections import (\n    apply_diff,\n    merge_dicts,\n    remove_missing_keys,\n    to_omegaconf,\n)\nfrom dvc.utils.serialize import dumps_yaml\n\n\nclass MyDict(dict):\n    pass\n\n\nclass MyInt(int):\n    pass\n\n\ndef test_apply_diff_is_inplace():\n    dest = MyDict()\n    dest.attr = 42\n    apply_diff({}, dest)\n\n    assert type(dest) is MyDict, \"Preserves class\"\n    assert dest.attr == 42, \"Preserves custom attrs\"\n\n\ndef test_apply_diff_mapping():\n    src = {\"a\": 1}\n    dest = {\"b\": 2}\n    apply_diff(src, dest)\n    assert dest == src, \"Adds and removes keys\"\n\n    src = {\"a\": 1}\n    dest = {\"a\": MyInt(1)}\n    apply_diff(src, dest)\n    assert type(dest[\"a\"]) is MyInt, \"Does not replace equals\"\n\n    src = {\"d\": {\"a\": 1}}\n    inner = {}\n    dest = {\"d\": inner}\n    apply_diff(src, dest)\n    assert dest[\"d\"] is inner, \"Updates inner dicts\"\n\n\ndef test_apply_diff_seq():\n    src = [1]\n    dest = [MyInt(1)]\n    apply_diff(src, dest)\n    assert type(dest[0]) is MyInt, \"Does not replace equals\"\n\n    src = {\"l\": [1]}\n    inner = []\n    dest = {\"l\": inner}\n    apply_diff(src, dest)\n    assert dest[\"l\"] is inner, \"Updates inner lists\"\n\n\ndef is_serializable(d):\n    json.dumps(d)\n    dumps_yaml(d)\n    return True\n\n\ndef test_to_omegaconf():\n    class CustomDict(dict):\n        pass\n\n    class CustomList(list):\n        pass\n\n    data = {\n        \"foo\": CustomDict(bar=1, bag=CustomList([1, 2])),\n        \"goo\": CustomList([CustomDict(goobar=1)]),\n    }\n    new_data = to_omegaconf(data)\n    assert not isinstance(new_data[\"foo\"], CustomDict)\n    assert not isinstance(new_data[\"foo\"][\"bag\"], CustomList)\n    assert not isinstance(new_data[\"goo\"], CustomList)\n    assert not isinstance(new_data[\"goo\"][0], CustomDict)\n\n\n@pytest.mark.parametrize(\n    \"changes, expected\",\n    [\n        ({\"foo\": \"baz\"}, {\"foo\": \"baz\", \"goo\": {\"bag\": 3}, \"lorem\": False}),\n        (\n            {\"foo\": \"baz\", \"goo\": \"bar\"},\n            {\"foo\": \"baz\", \"goo\": \"bar\", \"lorem\": False},\n        ),\n        (\n            {\"goo\": {\"bag\": 4}},\n            {\"foo\": {\"bar\": 1, \"baz\": 2}, \"goo\": {\"bag\": 4}, \"lorem\": False},\n        ),\n        (\n            {\"foo\": {\"bar\": 1, \"baz\": 2, 0: \"bar\"}},\n            {\n                \"foo\": {\"bar\": 1, \"baz\": 2, 0: \"bar\"},\n                \"goo\": {\"bag\": 3},\n                \"lorem\": False,\n            },\n        ),\n        (\n            {\"lorem\": {\"ipsum\": 3}},\n            {\n                \"foo\": {\"bar\": 1, \"baz\": 2},\n                \"goo\": {\"bag\": 3},\n                \"lorem\": {\"ipsum\": 3},\n            },\n        ),\n        ({}, {\"foo\": {\"bar\": 1, \"baz\": 2}, \"goo\": {\"bag\": 3}, \"lorem\": False}),\n    ],\n)\ndef test_merge_dicts(changes, expected):\n    params = {\"foo\": {\"bar\": 1, \"baz\": 2}, \"goo\": {\"bag\": 3}, \"lorem\": False}\n    merged = merge_dicts(params, changes)\n    assert merged == expected == params\n    assert params is merged  # references should be preserved\n    assert is_serializable(params)\n\n\n@pytest.mark.parametrize(\n    \"changes, expected\",\n    [\n        ({\"foo\": \"baz\"}, {\"foo\": {\"baz\": 2}}),\n        (\n            {\"foo\": \"baz\", \"goo\": \"bag\"},\n            {\"foo\": {\"baz\": 2}, \"goo\": {\"bag\": 3}},\n        ),\n        ({}, {}),\n    ],\n)\ndef test_remove_missing_keys(changes, expected):\n    params = {\"foo\": {\"bar\": 1, \"baz\": 2}, \"goo\": {\"bag\": 3}, \"lorem\": False}\n    removed = remove_missing_keys(params, changes)\n    assert removed == expected == params\n    assert params is removed  # references should be preserved\n    assert is_serializable(params)\n"
  },
  {
    "path": "tests/unit/utils/test_executors.py",
    "content": "import operator\nimport time\n\nimport pytest\nfrom funcy import raiser\n\nfrom dvc.utils.threadpool import ThreadPoolExecutor\n\n\n@pytest.mark.parametrize(\"wait\", [True, False])\n@pytest.mark.parametrize(\"cancel_futures\", [True, False])\ndef test_cancel_futures(wait, cancel_futures):\n    \"\"\"Modified from\n    https://github.com/python/cpython/blob/4d2403f/Lib/test/test_concurrent_futures.py#L354\n    \"\"\"\n    executor = ThreadPoolExecutor(max_workers=2)\n    fs = [executor.submit(time.sleep, 0.1) for _ in range(50)]\n    executor.shutdown(wait=wait, cancel_futures=cancel_futures)\n\n    if not wait:\n        for t in executor._threads:\n            t.join()\n\n    cancelled = [fut for fut in fs if fut.cancelled()]\n    # Use \"not fut.cancelled()\" instead of \"fut.done()\" to include futures\n    # that may have been left in a pending state.\n    others = [fut for fut in fs if not fut.cancelled()]\n\n    # Ensure the other futures were able to finish.\n    for fut in others:\n        assert fut.done()\n        assert fut.exception() is None\n\n    if not cancel_futures:\n        # there should be no cancelled futures\n        assert len(cancelled) == 0\n        assert len(others) == len(fs)\n    else:\n        # We can't guarantee the exact number of cancellations, but we can\n        # guarantee that *some* were cancelled. With few workers, many of\n        # the submitted futures should have been cancelled.\n        assert len(cancelled) > 20\n        # Similar to the number of cancelled futures, we can't guarantee the\n        # exact number that completed. But, we can guarantee that at least\n        # one finished.\n        assert len(others) > 0\n\n\ndef test_cancel_on_error_context_manager(mocker):\n    executor = ThreadPoolExecutor(max_workers=2, cancel_on_error=True)\n    spy = mocker.spy(executor, \"shutdown\")\n    with pytest.raises(RuntimeError), executor:  # noqa: PT012\n        future1 = executor.submit(operator.mul, 2, 21)\n        future2 = executor.submit(time.sleep, 0.1)\n        future3 = executor.submit(raiser(RuntimeError), \"This is an error\")\n        fs = [executor.submit(time.sleep, 0.1) for _ in range(50)]\n\n        assert future1.result() == 42\n        assert future2.result() is None\n        _ = future3.result()\n\n    spy.assert_called_once_with(wait=True, cancel_futures=True)\n\n    cancelled = [fut for fut in fs if fut.cancelled()]\n    others = [fut for fut in fs if not fut.cancelled()]\n\n    for fut in others:\n        assert fut.done()\n        assert fut.exception() is None\n\n    assert len(cancelled) > 20\n    assert len(others) > 0\n"
  },
  {
    "path": "tests/unit/utils/test_fs.py",
    "content": "import os\n\nimport pytest\n\nimport dvc\nfrom dvc.fs import system\nfrom dvc.utils import relpath\nfrom dvc.utils.fs import (\n    BasePathNotInCheckedPathException,\n    contains_symlink_up_to,\n    path_isin,\n    remove,\n)\n\n\ndef test_should_raise_exception_on_base_path_not_in_path():\n    with pytest.raises(BasePathNotInCheckedPathException):\n        contains_symlink_up_to(os.path.join(\"foo\", \"path\"), \"bar\")\n\n\ndef test_should_return_true_on_symlink_in_path(mocker):\n    mocker.patch.object(system, \"is_symlink\", return_value=True)\n    base_path = \"foo\"\n    path = os.path.join(base_path, \"bar\")\n    assert contains_symlink_up_to(path, base_path)\n\n\ndef test_should_return_false_on_path_eq_to_base_path(mocker):\n    mocker.patch.object(system, \"is_symlink\", return_value=False)\n    path = \"path\"\n    assert not contains_symlink_up_to(path, path)\n\n\ndef test_should_return_false_on_no_more_dirs_below_path(mocker):\n    mocker.patch.object(system, \"is_symlink\", return_value=False)\n    dirname_patch = mocker.patch.object(os.path, \"dirname\", side_effect=lambda arg: arg)\n    assert not contains_symlink_up_to(os.path.join(\"foo\", \"path\"), \"foo\")\n    dirname_patch.assert_called_once()\n\n\ndef test_should_return_false_when_base_path_is_symlink(mocker):\n    base_path = \"foo\"\n    target_path = os.path.join(base_path, \"bar\")\n\n    def base_path_is_symlink(path):\n        return path == base_path\n\n    mocker.patch.object(\n        system,\n        \"is_symlink\",\n        return_value=True,\n        side_effect=base_path_is_symlink,\n    )\n    assert not contains_symlink_up_to(target_path, base_path)\n\n\ndef test_path_object_and_str_are_valid_arg_types():\n    base_path = \"foo\"\n    target_path = os.path.join(base_path, \"bar\")\n    assert not contains_symlink_up_to(target_path, base_path)\n    assert not contains_symlink_up_to(target_path, base_path)\n\n\ndef test_should_call_recursive_on_no_condition_matched(mocker):\n    mocker.patch.object(system, \"is_symlink\", return_value=False)\n\n    contains_symlink_spy = mocker.spy(dvc.utils.fs, \"contains_symlink_up_to\")\n\n    # call from full path to match contains_symlink_spy patch path\n    assert not dvc.utils.fs.contains_symlink_up_to(os.path.join(\"foo\", \"path\"), \"foo\")\n    assert contains_symlink_spy.mock.call_count == 2\n\n\n@pytest.mark.skipif(os.name != \"nt\", reason=\"Windows specific\")\ndef test_relpath_windows_different_drives():\n    path1 = os.path.join(\"A:\", os.sep, \"some\", \"path\")\n    path2 = os.path.join(\"B:\", os.sep, \"other\", \"path\")\n    assert relpath(path1, path2) == path1\n\n    rel = relpath(path1, path2)\n    assert isinstance(rel, str)\n    assert rel == path1\n\n\ndef test_remove(tmp_dir):\n    tmp_dir.gen({\"foo\": \"foo content\"})\n    path = \"foo\"\n\n    remove(path)\n    assert not os.path.isfile(path)\n\n\ndef test_path_isin_positive():\n    child = os.path.join(\"path\", \"to\", \"folder\")\n\n    assert path_isin(child, os.path.join(\"path\", \"to\", \"\"))\n    assert path_isin(child, os.path.join(\"path\", \"to\"))\n    assert path_isin(child, os.path.join(\"path\", \"\"))\n    assert path_isin(child, os.path.join(\"path\"))\n\n\ndef test_path_isin_on_same_path():\n    path = os.path.join(\"path\", \"to\", \"folder\")\n    path_with_sep = os.path.join(path, \"\")\n\n    assert not path_isin(path, path)\n    assert not path_isin(path, path_with_sep)\n    assert not path_isin(path_with_sep, path)\n    assert not path_isin(path_with_sep, path_with_sep)\n\n\ndef test_path_isin_on_common_substring_path():\n    path1 = os.path.join(\"path\", \"to\", \"folder1\")\n    path2 = os.path.join(\"path\", \"to\", \"folder\")\n\n    assert not path_isin(path1, path2)\n\n\ndef test_path_isin_with_absolute_path():\n    parent = os.path.abspath(\"path\")\n    child = os.path.join(parent, \"to\", \"folder\")\n\n    assert path_isin(child, parent)\n\n\ndef test_path_isin_case_sensitive():\n    child = os.path.join(\"path\", \"to\", \"folder\")\n    parent = os.path.join(\"PATH\", \"TO\")\n\n    assert path_isin(child, parent) == (os.name == \"nt\")\n\n\n@pytest.mark.skipif(os.name != \"nt\", reason=\"Windows specific\")\ndef test_contains_symlink_case_sensitive_win():\n    child = os.path.join(\"path\", \"to\", \"folder\")\n    parent = os.path.join(\"PATH\", \"TO\")\n    assert contains_symlink_up_to(child, parent) is False\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"Posix specific\")\ndef test_contains_symlink_case_sensitive_posix():\n    child = os.path.join(\"path\", \"to\", \"folder\")\n    parent = os.path.join(\"PATH\", \"TO\")\n    with pytest.raises(BasePathNotInCheckedPathException):\n        contains_symlink_up_to(child, parent)\n\n\ndef test_makedirs(tmp_dir):\n    path = os.path.join(tmp_dir, \"directory\")\n\n    os.makedirs(path)\n    assert os.path.isdir(path)\n"
  },
  {
    "path": "tests/unit/utils/test_humanize.py",
    "content": "from collections import OrderedDict\n\nimport pytest\n\nfrom dvc.utils.humanize import get_summary, truncate_text\n\n\ndef test_get_summary():\n    # dict, so that we could delete from it easily\n    stats = OrderedDict(\n        [\n            (\"fetched\", 3),\n            (\"added\", [\"file1\", \"file2\", \"file3\"]),\n            (\"deleted\", [\"file4\", \"file5\"]),\n            (\"modified\", [\"file6\", \"file7\"]),\n        ]\n    )\n\n    assert (\n        get_summary(stats.items())\n        == \"3 files fetched, 3 files added, 2 files deleted and 2 files modified\"\n    )\n\n    del stats[\"fetched\"]\n    del stats[\"deleted\"][1]\n    assert (\n        get_summary(stats.items())\n        == \"3 files added, 1 file deleted and 2 files modified\"\n    )\n\n    del stats[\"deleted\"][0]\n    assert get_summary(stats.items()) == \"3 files added and 2 files modified\"\n\n    del stats[\"modified\"]\n    assert get_summary(stats.items()) == \"3 files added\"\n\n    assert not get_summary([])\n    assert not get_summary([(\"x\", 0), (\"y\", [])])\n    assert get_summary([(\"x\", 1), (\"y\", [])]) == \"1 file x\"\n\n\ndef test_truncate_text():\n    text = \"lorem ipsum\"\n    length = 5\n\n    truncated = truncate_text(text, length)\n    # length should not cross the max length\n    assert len(truncated) == length\n    assert truncated[:-1] == text[: length - 1]\n    # last character should be ellipsis\n    assert truncated[-1] == \"…\"\n\n    truncated = truncate_text(text, length, with_ellipsis=False)\n    # length should not cross the max length\n    assert len(truncated) == length\n    assert truncated == text[:length]\n\n\n@pytest.mark.parametrize(\"with_ellipsis\", [True, False])\ndef test_truncate_text_smaller_than_max_length(with_ellipsis):\n    text = \"lorem ipsum\"\n\n    # exact match as length\n    truncated = truncate_text(text, len(text), with_ellipsis=with_ellipsis)\n    assert len(truncated) == len(text)\n    assert truncated == text\n\n    # max_length > len(text)\n    truncated = truncate_text(text, len(text) + 1, with_ellipsis=with_ellipsis)\n    assert len(truncated) == len(text)\n    assert truncated == text\n"
  },
  {
    "path": "tests/unit/utils/test_plots.py",
    "content": "from dvc.utils.plots import get_plot_id, group_definitions_by_id\n\n\ndef test_get_plot_id():\n    assert get_plot_id(\"plot_id\", \"config_path\") == \"config_path::plot_id\"\n    assert get_plot_id(\"plot_id\", \"\") == \"plot_id\"\n\n\ndef test_group_definitions_by_id():\n    definitions = {\n        \"config1\": {\"data\": {\"plot1\": \"definition1\", \"plot2\": \"definition2\"}},\n        \"config2\": {\"data\": {\"plot1\": \"definition1\"}},\n    }\n    assert group_definitions_by_id(definitions) == {\n        \"config1::plot1\": (\"plot1\", \"definition1\"),\n        \"config2::plot1\": (\"plot1\", \"definition1\"),\n        \"plot2\": (\"plot2\", \"definition2\"),\n    }\n"
  },
  {
    "path": "tests/unit/utils/test_studio.py",
    "content": "from urllib.parse import urljoin\n\nimport pytest\nfrom dulwich.porcelain import remote_add as git_remote_add\nfrom requests import Response\n\nfrom dvc.env import (\n    DVC_EXP_GIT_REMOTE,\n    DVC_STUDIO_OFFLINE,\n    DVC_STUDIO_REPO_URL,\n    DVC_STUDIO_TOKEN,\n    DVC_STUDIO_URL,\n)\nfrom dvc.utils.studio import (\n    STUDIO_URL,\n    config_to_env,\n    env_to_config,\n    get_repo_url,\n    notify_refs,\n)\n\nCONFIG = {\"offline\": True, \"repo_url\": \"repo_url\", \"token\": \"token\", \"url\": \"url\"}\n\n\nENV = {\n    DVC_STUDIO_OFFLINE: True,\n    DVC_STUDIO_REPO_URL: \"repo_url\",\n    DVC_STUDIO_TOKEN: \"token\",\n    DVC_STUDIO_URL: \"url\",\n}\n\n\n@pytest.mark.parametrize(\n    \"status_code, side_effect\",\n    [\n        (200, {}),  # success\n        (401, {\"detail\": \"unauthorized\"}),  # should not fail on client errors\n        (500, ValueError),  # should not fail even on server errors\n    ],\n)\ndef test_notify_refs(mocker, status_code, side_effect):\n    response = Response()\n    response.status_code = status_code\n    mocker.patch.object(response, \"json\", side_effect=[side_effect])\n\n    mock_post = mocker.patch(\"requests.Session.post\", return_value=response)\n\n    notify_refs(\n        \"git@github.com:treeverse/dvc.git\",\n        \"TOKEN\",\n        pushed=[\"p1\", \"p2\"],\n        removed=[\"r1\", \"r2\"],\n    )\n\n    assert mock_post.called\n    assert mock_post.call_args == mocker.call(\n        urljoin(STUDIO_URL, \"/webhook/dvc\"),\n        json={\n            \"repo_url\": \"git@github.com:treeverse/dvc.git\",\n            \"client\": \"dvc\",\n            \"refs\": {\n                \"pushed\": [\"p1\", \"p2\"],\n                \"removed\": [\"r1\", \"r2\"],\n            },\n        },\n        headers={\"Authorization\": \"token TOKEN\"},\n        timeout=5,\n        allow_redirects=False,\n    )\n\n\ndef test_config_to_env():\n    assert config_to_env(CONFIG) == ENV\n\n\ndef test_env_to_config():\n    assert env_to_config(ENV) == CONFIG\n\n\n@pytest.mark.parametrize(\n    \"exp_git_remote, repo_url\",\n    [\n        (None, None),\n        (\"origin\", \"git@url\"),\n        (\"http://url\", \"http://url\"),\n    ],\n)\ndef test_get_repo_url(dvc, scm, monkeypatch, exp_git_remote, repo_url):\n    git_remote_add(scm.root_dir, \"origin\", \"git@url\")\n\n    if exp_git_remote:\n        monkeypatch.setenv(DVC_EXP_GIT_REMOTE, exp_git_remote)\n    assert get_repo_url(dvc) == repo_url\n"
  },
  {
    "path": "tests/unit/utils/test_utils.py",
    "content": "import os\nimport re\n\nimport pytest\n\nfrom dvc.utils import dict_sha256, fix_env, parse_target, relpath, resolve_output\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"pyenv-win is not supported\")\n@pytest.mark.parametrize(\n    \"path, orig\",\n    [\n        (\n            \"/pyenv/bin:/pyenv/libexec:/pyenv/plugins/plugin:/orig/path1:/orig/path2\",\n            \"/orig/path1:/orig/path2\",\n        ),\n        (\n            \"/pyenv/bin:/pyenv/libexec:/orig/path1:/orig/path2\",\n            \"/orig/path1:/orig/path2\",\n        ),\n        (\n            \"/pyenv/bin:/some/libexec:/pyenv/plugins/plugin:/orig/path1:/orig/path2\",\n            \"/orig/path1:/orig/path2\",\n        ),\n        (\"/orig/path1:/orig/path2\", \"/orig/path1:/orig/path2\"),\n        (\n            \"/orig/path1:/orig/path2:/pyenv/bin:/pyenv/libexec\",\n            \"/orig/path1:/orig/path2:/pyenv/bin:/pyenv/libexec\",\n        ),\n    ],\n)\ndef test_fix_env_pyenv(path, orig):\n    env = {\n        \"PATH\": path,\n        \"PYENV_ROOT\": \"/pyenv\",\n        \"PYENV_VERSION\": \"3.7.2\",\n        \"PYENV_DIR\": \"/some/dir\",\n        \"PYENV_HOOK_PATH\": \"/some/hook/path\",\n    }\n    assert fix_env(env)[\"PATH\"] == orig\n\n\n@pytest.mark.skipif(os.name != \"nt\", reason=\"Windows specific\")\ndef test_relpath_windows():\n    assert relpath(\"x:\\\\dir1\\\\dir2\\\\file.txt\", \"x:\\\\dir1\") == \"dir2\\\\file.txt\"\n\n    assert (\n        relpath(\"y:\\\\dir1\\\\dir2\\\\file.txt\", \"\\\\\\\\server\\\\share\\\\dir1\")\n        == \"y:\\\\dir1\\\\dir2\\\\file.txt\"\n    )\n\n\n@pytest.mark.parametrize(\n    \"inp,out,is_dir,expected\",\n    [\n        (\"target\", None, False, \"target\"),\n        (\"target\", \"dir\", True, os.path.join(\"dir\", \"target\")),\n        (\"target\", \"file_target\", False, \"file_target\"),\n        (\n            \"target\",\n            os.path.join(\"dir\", \"subdir\"),\n            True,\n            os.path.join(\"dir\", \"subdir\", \"target\"),\n        ),\n        (\"dir/\", None, False, \"dir\"),\n        (\"dir\", None, False, \"dir\"),\n        (\"dir\", \"other_dir\", False, \"other_dir\"),\n        (\"dir\", \"other_dir\", True, os.path.join(\"other_dir\", \"dir\")),\n    ],\n)\ndef test_resolve_output(inp, out, is_dir, expected, mocker):\n    mocker.patch(\"os.path.isdir\", return_value=is_dir)\n    result = resolve_output(inp, out)\n    assert result == expected\n\n\n@pytest.mark.parametrize(\n    \"inp,out, default\",\n    [\n        (\"dvc.yaml\", (\"dvc.yaml\", None), None),\n        (\"dvc.yaml:name\", (\"dvc.yaml\", \"name\"), None),\n        (\":name\", (\"dvc.yaml\", \"name\"), None),\n        (\"stage.dvc\", (\"stage.dvc\", None), None),\n        (\"../models/stage.dvc\", (\"../models/stage.dvc\", None), \"def\"),\n        (\":name\", (\"default\", \"name\"), \"default\"),\n        (\"something.dvc:name\", (\"something.dvc\", \"name\"), None),\n        (\"../something.dvc:name\", (\"../something.dvc\", \"name\"), None),\n        (\"file\", (None, \"file\"), None),\n        (\"build@15\", (None, \"build@15\"), None),\n        (\"build@{'level': 35}\", (None, \"build@{'level': 35}\"), None),\n        (\":build@15\", (\"dvc.yaml\", \"build@15\"), None),\n        (\":build@{'level': 35}\", (\"dvc.yaml\", \"build@{'level': 35}\"), None),\n        (\"dvc.yaml:build@15\", (\"dvc.yaml\", \"build@15\"), None),\n        (\n            \"dvc.yaml:build@{'level': 35}\",\n            (\"dvc.yaml\", \"build@{'level': 35}\"),\n            None,\n        ),\n        (\n            \"build2@{'level': [1, 2, 3]}\",\n            (None, \"build2@{'level': [1, 2, 3]}\"),\n            None,\n        ),\n        (\n            \":build2@{'level': [1, 2, 3]}\",\n            (\"dvc.yaml\", \"build2@{'level': [1, 2, 3]}\"),\n            None,\n        ),\n        (\n            \"dvc.yaml:build2@{'level': [1, 2, 3]}\",\n            (\"dvc.yaml\", \"build2@{'level': [1, 2, 3]}\"),\n            None,\n        ),\n    ],\n)\ndef test_parse_target(inp, out, default):\n    assert parse_target(inp, default) == out\n\n\ndef test_hint_on_lockfile():\n    with pytest.raises(\n        Exception, match=re.escape(\"Did you mean: `dvc.yaml:name`?\")\n    ) as e:\n        assert parse_target(\"dvc.lock:name\")\n    assert \"dvc.yaml:name\" in str(e.value)\n\n\n@pytest.mark.parametrize(\n    \"d,sha\",\n    [\n        (\n            {\n                \"cmd\": \"echo content > out\",\n                \"deps\": {\"dep\": \"2254342becceafbd04538e0a38696791\"},\n                \"outs\": {\"out\": \"f75b8179e4bbe7e2b4a074dcef62de95\"},\n            },\n            \"f472eda60f09660a4750e8b3208cf90b3a3b24e5f42e0371d829710e9464d74a\",\n        ),\n        (\n            {\n                \"cmd\": \"echo content > out\",\n                \"deps\": {\"dep\": \"2254342becceafbd04538e0a38696791\"},\n                \"outs\": [\"out\"],\n            },\n            \"a239b67073bd58affcdb81fff3305d1726c6e7f9c86f3d4fca0e92e8147dc7b0\",\n        ),\n    ],\n)\ndef test_dict_sha256(d, sha):\n    assert dict_sha256(d) == sha\n"
  },
  {
    "path": "tests/utils/__init__.py",
    "content": "import csv\nimport os\nfrom contextlib import contextmanager\n\nimport pytest\nfrom funcy import first\n\nfrom dvc.scm import Git\n\n# rewrite assertions in assert, pytest does not rewrite for other modules\n# than tests itself.\npytest.register_assert_rewrite(\"tests.utils.asserts\")\n\n\ndef get_gitignore_content():\n    with open(Git.GITIGNORE, encoding=\"utf-8\") as gitignore:\n        return gitignore.read().splitlines()\n\n\n@contextmanager\ndef cd(newdir):\n    prevdir = os.getcwd()\n    os.chdir(os.path.expanduser(newdir))\n    try:\n        yield\n    finally:\n        os.chdir(prevdir)\n\n\ndef to_posixpath(path):\n    return path.replace(\"\\\\\", \"/\")\n\n\ndef dump_sv(stream, metrics, delimiter=\",\", header=True):\n    if header:\n        writer = csv.DictWriter(\n            stream, fieldnames=list(first(metrics).keys()), delimiter=delimiter\n        )\n        writer.writeheader()\n        writer.writerows(metrics)\n    else:\n        writer = csv.writer(stream)\n        for d in metrics:\n            writer.writerow(list(d.values()))\n\n\n@contextmanager\ndef console_width(console, width):\n    console_options = console.options\n    original = console_options.max_width\n    con_width = console._width\n\n    try:\n        console_options.max_width = width\n        console._width = width\n        yield\n    finally:\n        console_options.max_width = original\n        console._width = con_width\n\n\nclass ANY:  # noqa: PLW1641\n    def __init__(self, expected_type):\n        self.expected_type = expected_type\n\n    def __repr__(self):\n        return \"Any\" + self.expected_type.__name__.capitalize()\n\n    def __eq__(self, other):\n        return isinstance(other, self.expected_type)\n"
  },
  {
    "path": "tests/utils/asserts.py",
    "content": "from typing import TYPE_CHECKING, Any\nfrom unittest.mock import ANY\n\nif TYPE_CHECKING:\n    from unittest.mock import Mock\n\n\ndef issubset(subset: dict, superset: dict) -> bool:\n    assert superset == superset | subset\n    return True\n\n\ndef called_once_with_subset(m: \"Mock\", *args: Any, **kwargs: Any) -> bool:\n    m.assert_called_once()\n    m_args, m_kwargs = m.call_args\n\n    expected_args = m_args + (ANY,) * (len(m_args) - len(args) - 1)\n    expected_kwargs = {k: kwargs.get(k, ANY) for k in m_kwargs}\n    m.assert_called_with(*expected_args, **expected_kwargs)\n\n    return True\n"
  },
  {
    "path": "tests/utils/plots.py",
    "content": "import dpath\n\ndpath.options.ALLOW_EMPTY_STRING_KEYS = True\n\n\ndef get_plot(plots_data, revision, typ=\"sources\", file=None, endkey=\"data\"):\n    if file is not None:\n        return dpath.get(plots_data, [revision, typ, \"data\", file, endkey])\n    return dpath.get(plots_data, [revision, typ, endkey])\n"
  }
]